/*
instructions.h

Copyright (C) 2003-2008 Gil Dabah, http://ragestorm.net/distorm/
This library is licensed under the BSD license. See the file COPYING.
*/


#ifndef INSTRUCTIONS_H
#define INSTRUCTIONS_H

#include "config.h"

#include "decoder.h"
#include "prefix.h"

/*
 * Operand type possibilities:
 * Note "_FULL" suffix indicates to decode the operand as 16 bits or 32 bits depends on DecodeType -
 * actually, it depends on the decoding mode, unless there's an operand/address size prefix.
 * For example, the code: 33 c0 could be decoded/executed as XOR AX, AX or XOR EAX, EAX.
 */
typedef enum OpType {
	/* No operand is set */
	OT_NONE = 0,

	/* Read a byte(8 bits) immediate */
	OT_IMM8,
	/* Force a read of a word(16 bits) immediate, used by ret only */
	OT_IMM16,
	/* Read a word/dword immediate */
	OT_IMM_FULL,
	/* Read a double-word(32 bits) immediate */
	OT_IMM32,

	/* Special immediate for two instructions, AAM, AAD which will output the byte only if it's not 0xa (base 10) */
	OT_IMM_AADM,

	/* Read a signed extended byte(8 bits) immediate */
	OT_SEIMM8,

	/* Use a 8bit register */
	OT_REG8,
	/* Use a 16bit register */
	OT_REG16,
	/* Use a 16/32/64bit register */
	OT_REG_FULL,
	/* Use a 32bit register */
	OT_REG32,
	/* MOVSXD uses 64 bits register */
	OT_REG64,
	/*
	 * If used with REX the reg operand size becomes 64 bits, otherwise 32 bits.
	 * VMX instructions are promoted automatically without a REX prefix.
	 */
	OT_REG32_64,
	/* Extract a 32bit register from the RM field, used for instructions with register operands only */
	OT_REG32_RM,
	/* Used only by MOV CR/DR(n). Promoted with REX onlly. */
	OT_FREG32_64_RM,

	/* Use or read (indirection) a 8bit register or immediate byte */
	OT_RM8,
	/* Some instructions force 16 bits (mov sreg, rm16) */
	OT_RM16,
	/* Use or read a 16/32/64bit register or immediate word/dword/qword */
	OT_RM_FULL,
	/* Use or read a 32bit register or immediate dword */
	OT_RM32,
	/*
	 * 32 or 64 bits (with REX) operand size indirection memory operand.
	 * Some instructions are promoted automatically without a REX prefix.
	 */
	OT_RM32_64,
	/* 16 or 32 bits RM. This is used only with MOVZXD instruction in 64bits. */
	OT_RM16_32,
	/* Same as OT_RMXX but POINTS to 16 bits [cannot use GENERAL-PURPOSE REG!] */
	OT_FPUM16,
	/* Same as OT_RMXX but POINTS to 32 bits (single precision) [cannot use GENERAL-PURPOSE REG!] */
	OT_FPUM32,
	/* Same as OT_RMXX but POINTS to 64 bits (double precision) [cannot use GENERAL-PURPOSE REG!] */
	OT_FPUM64,
	/* Same as OT_RMXX but POINTS to 80 bits (extended precision) [cannot use GENERAL-PURPOSE REG!] */
	OT_FPUM80,

	/*
	 * Special operand type for SSE4 where the ModR/M might
	 * be a 32 bits register or 8 bits memory indirection operand.
	 */
	OT_R32_M8,
	/*
	 * Special ModR/M for PINSRW, which need a 16 bits memory operand or 32 bits register.
	 * In 16 bits decoding mode R32 becomes R16, operand size cannot affect this.
	 */
	OT_R32_M16,
	/*
	 * Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or
	 * a 8 bits memory indirection operand.
	 */
	OT_R32_64_M8,
	/*
	 * Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or
	 * a 16 bits memory indirection operand.
	 */
	OT_R32_64_M16,
	/*
	 * Special operand type for MOV reg16/32/64/mem16, segReg 8C /r. and SMSW.
	 * It supports all decoding modes, but if used as a memory indirection it's a 16 bit ModR/M indirection.
	 */
	OT_RFULL_M16,

	/* Use a control register */
	OT_CREG,
	/* Use a debug register */
	OT_DREG,
	/* Use a segment register */
	OT_SREG,
	/*
	 * SEG is encoded in the flags of the opcode itself!
	 * This is used for specific "push SS" where SS is a segment where
	 * each "push SS" has an absolutely different opcode byte.
	 * We need this to detect whether an operand size prefix is used.
	 */
	OT_SEG,
	
	/* Use AL */
	OT_ACC8,
	/* Use AX (FSTSW) */
	OT_ACC16,
	/* Use AX/EAX/RAX */
	OT_ACC_FULL,
	/* Use AX/EAX, no REX is possible for RAX, used only with IN/OUT which don't support 64 bit registers */
	OT_ACC_FULL_NOT64,

	/*
	 * Read one word (seg), and a word/dword/qword (depends on operand size) from memory.
	 * JMP FAR [EBX] means EBX point to 16:32 ptr.
	 */
	OT_MEM16_FULL,
	/* Read one word (seg) and a word/dword/qword (depends on operand size), usually SEG:OFF, JMP 1234:1234 */
	OT_PTR16_FULL,
	/* Read one word (limit) and a dword/qword (limit) (depends on operand size), used by SGDT, SIDT, LGDT, LIDT. */
	OT_MEM16_3264,

	/* Read a byte(8 bits) immediate and calculate it relatively to the current offset of the instruction being decoded */
	OT_RELCB,
	/* Read a word/dword immediate and calculate it relatively to the current offset of the instruction being decoded */
	OT_RELC_FULL,

	/* Use general memory indirection, with varying sizes: */
	OT_MEM,
	OT_MEM32,
	/* Memory dereference for MOVNTI, either 32 or 64 bits (with REX). */
	OT_MEM32_64,
	OT_MEM64,
	OT_MEM128,
	/* Used for cmpxchg8b/16b. */
	OT_MEM64_128,

	/* Read an immediate as an absolute address, size is known by instruction, used by MOV (offset) only */
	OT_MOFFS,
	/* Use an immediate of 1, as for SHR R/M, 1 */
	OT_CONST1,
	/* Use CL, as for SHR R/M, CL */
	OT_REGCL,

	/*
	 * Instruction-Block for one byte long instructions, used by INC/DEC/PUSH/POP/XCHG,
	 * REG is extracted from the value of opcode
	 * Use a 8bit register
	 */
	OT_IB_RB,
	/* Use a 32 or 64bit (with REX) register, used by BSWAP */
	OT_IB_R_DW_QW,
	/* Use a 16/32/64bit register */
	OT_IB_R_FULL,

	/* Use [(r)SI] as INDIRECTION, for repeatable instructions */
	OT_REGI_ESI,
	/* Use [(r)DI] as INDIRECTION, for repeatable instructions */
	OT_REGI_EDI,
	/* Use [(r)BX + AL] as INDIRECTIOM, used by XLAT only */
	OT_REGI_EBXAL,
	/* Use [(r)AX] as INDIRECTION, used by AMD's SVM instructions */
	OT_REGI_EAX,
	/* Use DX, as for OUTS DX, BYTE [SI] */
	OT_REGDX,
	/* Use ECX in INVLPGA instruction */
	OT_REGECX,

	/* FPU registers: */
	OT_FPU_SI, /* ST(i) */
	OT_FPU_SSI, /* ST(0), ST(i) */
	OT_FPU_SIS, /* ST(i), ST(0) */

	/* MMX registers: */
	OT_MM,
	/* Extract the MMX register from the RM bits this time (used when the REG bits are used for opcode extension) */
	OT_MM_RM,
	/* ModR/M points to 32 bits MMX variable */
	OT_MM32,
	/* ModR/M points to 32 bits MMX variable */
	OT_MM64,

	/* SSE registers: */
	OT_XMM,
	/* Extract the SSE register from the RM bits this time (used when the REG bits are used for opcode extension) */
	OT_XMM_RM,
	/* ModR/M points to 16 bits SSE variable */
	OT_XMM16,
	/* ModR/M points to 32 bits SSE variable */
	OT_XMM32,
	/* ModR/M points to 64 bits SSE variable */
	OT_XMM64,
	/* ModR/M points to 128 bits SSE variable */
	OT_XMM128,
	/* Implied XMM0 register as operand, used in SSE4. */
	OT_REGXMM0,

	/*
	 * DUMMY for cases like CALL WORD [BX+DI], we would like to omit this "WORD". It's useless,
	 * because the DWORD/WORD/BYTE mechanism is being done automatically, we need some way to disable it in such cases...
	 */
	OT_DUMMY
} _OpType;

/* Flags for instruction: */

/* Empty flags indicator: */
#define INST_FLAGS_NONE ((_iflags)-1)

/*
 * Explicitly define that the instruction doesn't require a ModRM byte.
 * NOTE its value is 0! you can't do much with it, it is used for instructions that for sure don't use the ModR/M byte.
 */
#define INST_EXCLUDE_MODRM (0)
/* The instruction we are going to decode has a ModR/M byte. */
#define INST_INCLUDE_MODRM (1)
/* Special treatment for instructions which are in the divided-category but still needs the whole byte for ModR/M... */
#define INST_NOT_DIVIDED (1 << 1)
/*
 * Used explicitly in repeatable instructions,
 * which needs a suffix letter in their mnemonic to specify operation-size (depend on operands).
 */
#define INST_16BITS (1 << 2)
/* If the opcode is supported by 80286 and upper models (16/32 bits). */
#define INST_32BITS (1 << 3)
/*
 * Prefix flags (4 types: lock/rep, seg override, addr-size, oper-size)
 * There are several specific instructions that can follow LOCK prefix,
 * note that they must be using a memory operand form, otherwise they generate an exception.
 */
#define INST_PRE_LOCK (1 << 4)
/* REPNZ prefix for string instructions only - means an instruction can follow it. */
#define INST_PRE_REPNZ (1 << 5)
/* REP prefix for string instructions only - means an instruction can follow it. */
#define INST_PRE_REP (1 << 6)
/* CS override prefix. */
#define INST_PRE_CS (1 << 7)
/* SS override prefix. */
#define INST_PRE_SS (1 << 8)
/* DS override prefix. */
#define INST_PRE_DS (1 << 9)
/* ES override prefix. */
#define INST_PRE_ES (1 << 10)
/* FS override prefix. Funky Segment :) */
#define INST_PRE_FS (1 << 11)
/* GS override prefix. Groovy Segment, of course not, duh ! */
#define INST_PRE_GS (1 << 12)
/* Switch operand size from 32 to 16 and vice versa. */
#define INST_PRE_OP_SIZE (1 << 13)
/* Switch address size from 32 to 16 and vice versa. */
#define INST_PRE_ADDR_SIZE (1 << 14)
/* Native instructions which needs suffix letter to indicate their operation-size (and don't depend on operands). */
#define INST_NATIVE (1 << 15)
/* Use extended mnemonic, means it's an _InstInfoEx structure, which contains another mnemonic for 32 bits specifically. */
#define INST_USE_EXMNEMONIC (1 << 16)
/* Use third operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */
#define INST_USE_OP3 (1 << 17)
/* Use fourth operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */
#define INST_USE_OP4 (1 << 18)
/* The instruction's mnemonic depends on the mod value of the ModR/M byte (mod=11, mod!=11). */
#define INST_MODRM_BASED (1 << 19)
/* The instruction uses a ModR/M byte which the MOD must be 11 (for registers operands only). */
#define INST_MODRR (1 << 20)
/* The way of 3DNow! instructions are built, we have to handle their locating specially. Suffix imm8 tells which instruction it is. */
#define INST_3DNOW_FETCH (1 << 21)
/* The instruction needs two suffixes, one for the comparison type (imm8) and the second for its operation size indication (second mnemonic). */
#define INST_PSEUDO_OPCODE (1 << 22)
/* Invalid instruction at 64 bits decoding mode. */
#define INST_INVALID_64BITS (1 << 23)
/* Specific instruction is can be promoted to 64 bits (without REX it is promoted automatically). */
#define INST_64BITS (1 << 24)
/* Indicates the instruction must be REX prefixed in order to use 64 bits operands. */
#define INST_PRE_REX (1 << 25)
/* Third mnemonic is set. */
#define INST_USE_EXMNEMONIC2 (1 << 26)
/* Instruction is only valid in 64 bits decoding mode. */
#define INST_64BITS_FETCH (1 << 27)

#define INST_PRE_REPS (INST_PRE_REPNZ | INST_PRE_REP)
#define INST_PRE_LOKREP_MASK (INST_PRE_LOCK | INST_PRE_REPNZ | INST_PRE_REP)
#define INST_PRE_SEGOVRD_MASK (INST_PRE_CS | INST_PRE_SS | INST_PRE_DS | INST_PRE_ES | INST_PRE_FS | INST_PRE_GS)

/* Instructions Set classes: */
/* Indicates the instruction belongs to the General Integer set. */
#define ISCT_INTEGER 1
/* Indicates the instruction belongs to the 387 FPU set. */
#define ISCT_FPU 2
/* Indicates the instruction belongs to the P6 set. */
#define ISCT_P6 3
/* Indicates the instruction belongs to the MMX set. */
#define ISCT_MMX 4
/* Indicates the instruction belongs to the SSE set. */
#define ISCT_SSE 5
/* Indicates the instruction belongs to the SSE2 set. */
#define ISCT_SSE2 6
/* Indicates the instruction belongs to the SSE3 set. */
#define ISCT_SSE3 7
/* Indicates the instruction belongs to the SSSE3 set. */
#define ISCT_SSSE3 8
/* Indicates the instruction belongs to the SSE4.1 set. */
#define ISCT_SSE4_1 9
/* Indicates the instruction belongs to the SSE4.2 set. */
#define ISCT_SSE4_2 10
/* Indicates the instruction belongs to the AMD's SSE4.A set. */
#define ISCT_SSE4_A 11
/* Indicates the instruction belongs to the 3DNow! set. */
#define ISCT_3DNOW 12
/* Indicates the instruction belongs to the 3DNow! Extensions set. */
#define ISCT_3DNOWEXT 13
/* Indicates the instruction belongs to the VMX (Intel) set. */
#define ISCT_VMX 14
/* Indicates the instruction belongs to the SVM (AMD) set. */
#define ISCT_SVM 15

/*
 * Indicates which operand is being decoded.
 * Destination (1st), Source (2nd), op3 (3rd), op4 (4th).
 * Its main purpose to help the decode-operands function know whether its the first operand (+ it's indirection + there's a lock prefix).
 */
typedef enum {ONT_NONE = -1, ONT_1, ONT_2, ONT_3, ONT_4} _OperandNumberType;

#define MAX_MNEMONIC_LENGTH (32)

#ifdef _MSC_VER
 #pragma pack(push, 1)
#endif

/*
 * Info about the instruction, source/dest types, its name in text and flags.
 * This structure is used for the instructions DB and NOT for the disassembled result code!
 * This is the BASE structure, there are extentions to this structure below.
 */

typedef struct _PACKED_ {
	uint8_t type;
	uint8_t isc;
	uint8_t s, d; /* OpType */
	int8_t* mnemonic;
	_iflags flags;
} _InstInfo;

/*
 * There are merely few instructions which need a second mnemonic for 32 bits.
 * Or a third for 64 bits. Therefore sometimes the second mnemonic is empty but not the third.
 * In all decoding modes the first mnemonic is the default.
 * A flag will indicate it uses another mnemonic.
 *
 * There are a couple of (SSE4) instructions in the whole DB which need both op3 and 3rd mnemonic for 64bits,
 * therefore, I decided to make the extended structure contain all extra info in the same structure.
 * There are a few instructions (SHLD/SHRD/IMUL and SSE too) which use third operand (or a fourth).
 * A flag will indicate it uses a third/fourth operand.
 *
 */
typedef struct _PACKED_ {
	uint8_t type;
	uint8_t isc;
	uint8_t s, d; /* OpType */
	int8_t* mnemonic;
	_iflags flags;
	uint8_t op3, op4; /* OpType */
	int8_t* mnemonic2;
	int8_t* mnemonic3;
} _InstInfoEx;

/* Trie data structure node type: */
typedef enum {
	INT_NOTEXISTS = -1, /* Not exists (this is used for a return code only). */
	INT_NONE, /* No instruction info or list set. */
	INT_INFO, /* It's an instruction info. */
	INT_LIST_GROUP,
	INT_LIST_FULL,
	INT_LIST_DIVIDED
} _InstNodeType;

/*
 * A node in the instructions DB;
 * Can be both a node or an info, depends on type.
 */
typedef struct _PACKED_ InstNode{
	uint8_t type;
	uint8_t* ids;
	_InstInfo** list; /* The second level might point to _InstNode, this is determined by type in runtime. */
} _InstNode;

#ifdef _MSC_VER
 #pragma pack(pop)
#endif

typedef enum {OPERAND_SIZE_NONE = 0, OPERAND_SIZE8, OPERAND_SIZE16, OPERAND_SIZE32, OPERAND_SIZE64, OPERAND_SIZE80, OPERAND_SIZE128} _OperandSizeType;

/*
 * Used for letting the extract operand know the type of operands without knowing the
 * instruction itself yet, because of the way those instructions work.
 */
extern _InstInfo II_3dnow;

_InstInfo* locate_inst(const uint8_t** code, int* codeLen, _OffsetType* codeOffset, _WString* instructionHex, _PrefixState* ps, _DecodeType dt);
_InstInfo* locate_3dnow_inst(_CodeInfo* ci, _WString* instructionHex);

/* Concatenates a text describing the size used for indirections form. (MOV *WORD* [BX], 0x12) when it's not cleared from operands. */
void str_indirection_text(_WString* s, _OperandSizeType opSize);

#endif /* INSTRUCTIONS_H */

