/*
prefix.c

Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/
This library is licensed under the BSD license. See the file COPYING.
*/


#include "prefix.h"

#include "textdefs.h"
#include "x86defs.h"
#include "instructions.h"

/*
 * The main purpose of this module is to keep track of all kind of prefixes a single instruction may have.
 * The problem is that a single instruction may have up to five different prefix-type.
 * That's why I have to detect such cases and drop those excess prefixes.
 */

int is_prefix(unsigned int ch, _DecodeType dt)
{
	switch (ch) {
		/* for i in xrange(0x40, 0x50): print "case 0x%2x:" % i */
		case 0x40: /* REX: */
		case 0x41:
		case 0x42:
		case 0x43:
		case 0x44:
		case 0x45:
		case 0x46:
		case 0x47:
		case 0x48:
		case 0x49:
		case 0x4a:
		case 0x4b:
		case 0x4c:
		case 0x4d:
		case 0x4e:
		case 0x4f: return (dt == Decode64Bits);
		case PREFIX_LOCK: return TRUE;
		case PREFIX_REPNZ: return TRUE;
		case PREFIX_REP: return TRUE;
		case PREFIX_CS: return TRUE;
		case PREFIX_SS: return TRUE;
		case PREFIX_DS: return TRUE;
		case PREFIX_ES: return TRUE;
		case PREFIX_FS: return TRUE;
		case PREFIX_GS: return TRUE;
		case PREFIX_OP_SIZE: return TRUE;
		case PREFIX_ADDR_SIZE: return TRUE;
	}
	return FALSE;
}

static const uint8_t* PREFIX_MIN(const uint8_t* a, const uint8_t* b, const uint8_t* c, const uint8_t* d, const uint8_t* def)
{
	/*
	 * Check for null.
	 * Return smallest (=first good prefix to take into account).
	 */

	if (!a && !b && !c && !d) return def;
	if (!a) a = (const uint8_t*)~0; /* MAX PTR ? :) */
	if (!b) b = (const uint8_t*)~0;
	if (!c) c = (const uint8_t*)~0;
	if (!d) d = (const uint8_t*)~0;

	if (b < a) a = b;
	if (c < a) a = c;
	if (d < a) a = d;
	return a == (const uint8_t*)~0 ? def : a;
}

/* Return the flag and type of given prefix. */
void get_prefix_flag(unsigned int ch, _PrefixInfo* pi, _DecodeType dt)
{
	pi->flag = INST_FLAGS_NONE;
	pi->type = PRE_NONE;
	/*
	NOTE: AMD treat lock/rep as two different groups... But I am based on Intel.

	- Lock and Repeat:
			- 0xF0 — LOCK
			- 0xF2 — REPNE/REPNZ
			- 0xF3 - REP/REPE/REPZ
	- Segment Override:
			- 0x2E - CS
			- 0x36 - SS
			- 0x3E - DS
			- 0x26 - ES
			- 0x64 - FS
			- 0x65 - GS
		- Operand-Size Override: 0x66, switching default size.
		- Address-Size Override: 0x67, switching default size.

		64 Bits:
		- REX: 0x40 - 0x4f, extends register access.
	*/

	switch (ch)
	{
		/* REX type, 64 bits decoding mode only: */
		case 0x40:
		case 0x41:
		case 0x42:
		case 0x43:
		case 0x44:
		case 0x45:
		case 0x46:
		case 0x47:
		case 0x48:
		case 0x49:
		case 0x4a:
		case 0x4b:
		case 0x4c:
		case 0x4d:
		case 0x4e:
		case 0x4f:
			if (dt == Decode64Bits) {
				pi->flag = INST_PRE_REX;
				pi->type = PRE_REX;
			}
		break;

		/* LOCK and REPx type: */
		case PREFIX_LOCK: pi->flag = INST_PRE_LOCK; pi->type = PRE_LOKREP; break;
		case PREFIX_REPNZ: pi->flag = INST_PRE_REPNZ; pi->type = PRE_LOKREP; break;
		case PREFIX_REP: pi->flag = INST_PRE_REP; pi->type = PRE_LOKREP; break;

		/* Seg Overide type: */
		case PREFIX_CS: pi->flag = INST_PRE_CS; pi->type = PRE_SEGOVRD; break;
		case PREFIX_SS: pi->flag = INST_PRE_SS; pi->type = PRE_SEGOVRD; break;
		case PREFIX_DS: pi->flag = INST_PRE_DS; pi->type = PRE_SEGOVRD; break;
		case PREFIX_ES: pi->flag = INST_PRE_ES; pi->type = PRE_SEGOVRD; break;
		case PREFIX_FS: pi->flag = INST_PRE_FS; pi->type = PRE_SEGOVRD; break;
		case PREFIX_GS: pi->flag = INST_PRE_GS; pi->type = PRE_SEGOVRD; break;

		/* Op Size type: */
		case PREFIX_OP_SIZE: pi->flag = INST_PRE_OP_SIZE; pi->type = PRE_OPSIZE; break;

		/* Addr Size type: */
		case PREFIX_ADDR_SIZE: pi->flag = INST_PRE_ADDR_SIZE; pi->type = PRE_ADDRSIZE; break;
	}
}

void decode_prefixes(const uint8_t* code, int codeLen, _PrefixState* ps, _DecodeType dt)
{
	/*
	 * First thing to do, scan for prefixes, there are five types of prefixes.
	 * There may be up to five prefixes before a single instruction, not the same type, no special order,
	 * except REX must precede immediately the first opcode.
	 * BTW - This is the reason why I didn't make the REP prefixes part of the instructions (STOS/SCAS/etc).
	 *
	 * Another thing, the instruction maximum size is 15 bytes, thus if we read more than 15 bytes, we will halt.
	 */

	_PrefixInfo pi;
	ps->start = code;

	while ((--codeLen >= 0) || (code - ps->start >= INST_MAXIMUM_SIZE)) {
		/* Examine what type of prefix we got. */
		get_prefix_flag(*code, &pi, dt);
		if (pi.flag == INST_FLAGS_NONE) break; /* Halt scanning. */

		/*
		 * If we got something that is ALREADY included,
		 * we will have to skip that many bytes 'till we get past the first occurrence.
		 * Take a look: XYzABCUVz, the result would be: ABCUVz;  XYz are being dropped (we have to skip them),
		 * because then we would have z included twice, which is not allowed by 80x86.
		 */
		switch (pi.type)
		{
			case PRE_LOKREP:
				if ((ps->totalPrefixes & INST_PRE_LOKREP_MASK) != 0) { /* Is it second time we got this same type prefix? */
					/* Remove all flags of this group, because we don't know which is the set one. */
					ps->totalPrefixes &= ~INST_PRE_LOKREP_MASK;

					/*
					 * Check whether we have to remove other types.
					 * We remove other types if they appeared before the first repeating type.
					 * We also have to update flags and positions.
					 */
					if (ps->segovrdPos && ps->segovrdPos < ps->lokrepPos) {
						/* Update flags, remove any flag that is segment override concerned. */
						ps->totalPrefixes &= ~INST_PRE_SEGOVRD_MASK;
						ps->segovrdPos = NULL;
					}
					if (ps->opsizePos && ps->opsizePos < ps->lokrepPos) {
						ps->totalPrefixes &= ~INST_PRE_OP_SIZE; /* No need for mask, it's a single bit. */
						ps->opsizePos = NULL;
					}
					if (ps->addrsizePos && ps->addrsizePos < ps->lokrepPos) {
						ps->totalPrefixes &= ~INST_PRE_ADDR_SIZE;
						ps->addrsizePos = NULL;
					}
					if (ps->rexpos && ps->rexpos < ps->lokrepPos) {
						ps->totalPrefixes &= ~INST_PRE_REX;
						ps->rexpos = NULL;
					}

					/*
					 * Update current type position to last ^good^ position.
					 * Notice we update the position ONLY after we removed dropped prefixes, so they're dropped relative to the old one.
					 */
					ps->lokrepPos = code;

					/*
					 * start points to the first prefix we take into account (ignoring dropped prefixes).
					 * Notice we do the assignment after the above if statements, because maybe they could affect the result.
					 */
					ps->start = PREFIX_MIN(ps->segovrdPos, ps->opsizePos, ps->addrsizePos, ps->rexpos, ps->lokrepPos);
				} else {
					/* Update position to first occurence. */
					ps->lokrepPos = code;
				}
				/* Set flags anyways, if it's second time we cleaned the flags of this group already. */
				ps->totalPrefixes |= pi.flag;
			break;
			case PRE_SEGOVRD:
				if ((ps->totalPrefixes & INST_PRE_SEGOVRD_MASK) != 0) {
					ps->totalPrefixes &= ~INST_PRE_SEGOVRD_MASK;

					if (ps->lokrepPos && ps->lokrepPos < ps->segovrdPos) {
						ps->totalPrefixes &= ~INST_PRE_LOKREP_MASK;
						ps->lokrepPos = NULL;
					}
					if (ps->opsizePos && ps->opsizePos < ps->segovrdPos) {
						ps->totalPrefixes &= ~INST_PRE_OP_SIZE;
						ps->opsizePos = NULL;
					}
					if (ps->addrsizePos && ps->addrsizePos < ps->segovrdPos) {
						ps->totalPrefixes &= ~INST_PRE_ADDR_SIZE;
						ps->addrsizePos = NULL;
					}
					if (ps->rexpos && ps->rexpos < ps->segovrdPos) {
						ps->totalPrefixes &= ~INST_PRE_REX;
						ps->rexpos = NULL;
					}
					ps->segovrdPos = code;
					ps->start = PREFIX_MIN(ps->lokrepPos, ps->opsizePos, ps->addrsizePos, ps->rexpos, ps->segovrdPos);
				} else {
					ps->segovrdPos = code;
				}
				ps->totalPrefixes |= pi.flag;
			break;
			case PRE_OPSIZE:
				if (ps->totalPrefixes & pi.flag) {
					if (ps->lokrepPos && ps->lokrepPos < ps->opsizePos) {
						ps->totalPrefixes &= ~INST_PRE_LOKREP_MASK;
						ps->lokrepPos =	NULL;
					}
					if (ps->segovrdPos && ps->segovrdPos < ps->opsizePos) {
						ps->totalPrefixes &= ~INST_PRE_SEGOVRD_MASK;
						ps->segovrdPos = NULL;
					}
					if (ps->addrsizePos && ps->addrsizePos < ps->opsizePos) {
						ps->totalPrefixes &= ~INST_PRE_ADDR_SIZE;
						ps->addrsizePos = NULL;
					}
					if (ps->rexpos && ps->rexpos < ps->opsizePos) {
						ps->totalPrefixes &= ~INST_PRE_REX;
						ps->rexpos = NULL;
					}
					ps->opsizePos = code;
					ps->start = PREFIX_MIN(ps->lokrepPos, ps->segovrdPos, ps->addrsizePos, ps->rexpos, ps->opsizePos);
				} else {
					ps->totalPrefixes |= pi.flag;
					ps->opsizePos = code;
				}
			break;
			case PRE_ADDRSIZE:
				if (ps->totalPrefixes & pi.flag) {
					if (ps->lokrepPos && ps->lokrepPos < ps->addrsizePos) {
						ps->totalPrefixes &= ~INST_PRE_LOKREP_MASK;
						ps->lokrepPos = NULL;
					}
					if (ps->segovrdPos && ps->segovrdPos < ps->addrsizePos) {
						ps->totalPrefixes &= ~INST_PRE_SEGOVRD_MASK;
						ps->segovrdPos = NULL;
					}
					if (ps->opsizePos && ps->opsizePos < ps->addrsizePos) {
						ps->totalPrefixes &= ~INST_PRE_OP_SIZE;
						ps->opsizePos = NULL;
					}
					if (ps->rexpos && ps->rexpos < ps->addrsizePos) {
						ps->totalPrefixes &= ~INST_PRE_REX;
						ps->rexpos = NULL;
					}
					ps->addrsizePos = code;
					ps->start = PREFIX_MIN(ps->lokrepPos, ps->segovrdPos, ps->opsizePos, ps->rexpos, ps->addrsizePos);
				} else {
					ps->totalPrefixes |= pi.flag;
					ps->addrsizePos = code;
				}
			break;
			case PRE_REX:
				if (ps->totalPrefixes & pi.flag) {
					if (ps->lokrepPos && ps->lokrepPos < ps->rexpos) {
						ps->totalPrefixes &= ~INST_PRE_LOKREP_MASK;
						ps->lokrepPos = NULL;
					}
					if (ps->segovrdPos && ps->segovrdPos < ps->rexpos) {
						ps->totalPrefixes &= ~INST_PRE_SEGOVRD_MASK;
						ps->segovrdPos = NULL;
					}
					if (ps->opsizePos && ps->opsizePos < ps->rexpos) {
						ps->totalPrefixes &= ~INST_PRE_OP_SIZE;
						ps->opsizePos = NULL;
					}
					if (ps->addrsizePos && ps->addrsizePos < ps->rexpos) {
						ps->totalPrefixes &= ~INST_PRE_ADDR_SIZE;
						ps->addrsizePos = NULL;
					}
					ps->rexpos = code;
					ps->start = PREFIX_MIN(ps->lokrepPos, ps->segovrdPos, ps->opsizePos, ps->addrsizePos, ps->rexpos);
				} else {
					ps->totalPrefixes |= pi.flag;
					ps->rexpos = code;
				}
				break;
				default: return;
		}
		code++;
	}
	/*
	 * Save last byte scanned address, so the decoder could keep on scanning from this point and on and on and on.
	 * In addition the decoder is able to know that the last byte could lead to MMX/SSE instructions (preceding REX if exists).
	 */
	ps->last = code; /* ps->last points to the next byte following the last prefix byte! */
}

/*
* This function gets the prefix state of the last instruction and the total prefixes
* of that instruction which we got from the code itself,
* then it DB's everything which wasn't used, by looking at the instruction's prefixes we GOT
* and the USED prefixes (which is returned by the extract_operand).
* Finally, it returns a string of the output (unused prefixes).

* Note: This function should get the valid prefixes which weren't dropped.
* 			The Decode function deals with the dropped ones by itself.

* Here is a small sample for showing a case where there are some prefixes which weren't used, and should be DB'ed.

* 16 bits decoding mode:
* 40 ~ INC AX
* 66 40 ~ INC EAX
* 66 2E 40 ~ DB 2E; INC EAX
* 67 2E ~ DB 67; INC AX

* The whole thing is working by using the usedPrefixes, this variable gets updated
* whenever the decoder mechanism is affected by any one of the totalPrefixes prefixes.
* There are a few spots in the code along some of the decoder functions which are responsible for updating the usedPrefixes.

* When calling this function, you assume totalPrefixes contains all non-dropped prefixes for that instruction,
* and also the usedPrefixes is filled already.
*/

void get_unused_prefixes_list(uint8_t unusedList[MAX_PREFIXES], _PrefixState* ps)
{
	/* We might have 5 unused prefixes at most, up to 5 prefixes for one instruction. */
	const uint8_t* ptrs[MAX_PREFIXES] = {0};

	unsigned int i, j;
	const uint8_t* tmp = NULL;

	memset(unusedList, 0, MAX_PREFIXES);

	/*
	 * We have to restore at this point flags that were disabled manually, because they are ignored in 64 bits.
	 * Therefore, we will have to check whether they were set before we disabled them and now reenable them in order
	 * to output them as unused prefixes.
	 */

	/* Check out whether the REX prefix was ignored... and reenable it, so we can see if it were used or not. */
	if (ps->rexpos != NULL) {
		ps->totalPrefixes |= INST_PRE_REX;
		/* Reenable operand size prefix, so it will be dropped as unused. */
		if ((ps->opsizePos != NULL) && (*ps->rexpos & PREFIX_REX_W)) ps->totalPrefixes |= INST_PRE_OP_SIZE;
	}
	/* It could be that we disabled any one of the segment overrides. */
	if ((ps->segovrdPos != NULL) && ((ps->totalPrefixes & (INST_PRE_SEGOVRD_MASK)) == 0)) {
		 /*
		  * We have to reenable it manually.
		  * Doesn't really matter which one we use to enable it.
		  */
		ps->totalPrefixes |= INST_PRE_CS;
	}

	/* Remove all used prefixes. */
	ps->totalPrefixes &= ~ps->usedPrefixes;

	/* Caller function depends on this value, so it will know how many unused prefixes there are. */
	ps->unusedCount = 0;

	/* All are used? Cool then. */
	if (ps->totalPrefixes == ps->usedPrefixes) return ;

	/*
	 * Determine what types of prefixes were unused.
	 * Every type has only one pointer to that prefix.
	 */
	if (ps->totalPrefixes & INST_PRE_OP_SIZE) ptrs[ps->unusedCount++] = ps->opsizePos;
	if (ps->totalPrefixes & INST_PRE_ADDR_SIZE) ptrs[ps->unusedCount++] = ps->addrsizePos;
	if (ps->totalPrefixes & INST_PRE_LOKREP_MASK) ptrs[ps->unusedCount++] = ps->lokrepPos;
	if (ps->totalPrefixes & INST_PRE_SEGOVRD_MASK) ptrs[ps->unusedCount++] = ps->segovrdPos;
	if (ps->totalPrefixes & INST_PRE_REX) ptrs[ps->unusedCount++] = ps->rexpos;

	/*
	 * Sort them, so you output them by their real order.
	 * Bubble yak is good enough.
	 */
	for (i = 0; i < ps->unusedCount; i++) {
		for (j = 0; j < ps->unusedCount; j++) {
			if (ptrs[j] > ptrs[i]) {
				tmp = ptrs[j];
				ptrs[j] = ptrs[i];
				ptrs[i] = tmp;
			}
		}
	}

	/* Get values and store in the given array. */
	for (i = 0; i < ps->unusedCount; i++)
		unusedList[i] = *ptrs[i];
}


/*
 * Concatenates a string of the used segment by examining the prefixes.
 * Side Effects - This function will change the prefix state variables on certain cases.
 */
void str_seg_text(_WString* s, _PrefixState* ps, _DecodeType dt)
{
	_iflags flags = ps->totalPrefixes & INST_PRE_SEGOVRD_MASK;
	/* Segment Override prefixes are ignored in 64 bits. */
	if (flags == 0) return;

	/* 64 bits mode ignoers some prefixes. */
	if (dt != Decode64Bits) {
		switch (flags)
		{
			case INST_PRE_CS:
				ps->usedPrefixes |= INST_PRE_CS;
				strcat_WSN(s, PREFIX_CS_TEXT);
				chrcat_WS(s, SEG_OFF_CHR);
			return;
			case INST_PRE_SS:
				ps->usedPrefixes |= INST_PRE_SS;
				strcat_WSN(s, PREFIX_SS_TEXT);
				chrcat_WS(s, SEG_OFF_CHR);
			return;
			case INST_PRE_DS:
				ps->usedPrefixes |= INST_PRE_DS;
				strcat_WSN(s, PREFIX_DS_TEXT);
				chrcat_WS(s, SEG_OFF_CHR);
			return;
			case INST_PRE_ES:
				ps->usedPrefixes |= INST_PRE_ES;
				strcat_WSN(s, PREFIX_ES_TEXT);
				chrcat_WS(s, SEG_OFF_CHR);
			return;
		}
	}
	switch (flags)
	{
		case INST_PRE_FS:
			ps->usedPrefixes |= INST_PRE_FS;
			strcat_WSN(s, PREFIX_FS_TEXT);
			chrcat_WS(s, SEG_OFF_CHR);
		return;
		case INST_PRE_GS:
			ps->usedPrefixes |= INST_PRE_GS;
			strcat_WSN(s, PREFIX_GS_TEXT);
			chrcat_WS(s, SEG_OFF_CHR);
		return;
	}
}
