1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
|
/*
instructions.h
diStorm3 - Powerful disassembler for X86/AMD64
http://ragestorm.net/distorm/
distorm at gmail dot com
Copyright (C) 2003-2016 Gil Dabah
This library is licensed under the BSD license. See the file COPYING.
*/
#ifndef INSTRUCTIONS_H
#define INSTRUCTIONS_H
#include "config.h"
#include "prefix.h"
/*
* Operand type possibilities:
* Note "_FULL" suffix indicates to decode the operand as 16 bits or 32 bits depends on DecodeType -
* actually, it depends on the decoding mode, unless there's an operand/address size prefix.
* For example, the code: 33 c0 could be decoded/executed as XOR AX, AX or XOR EAX, EAX.
*/
typedef enum OpType {
/* No operand is set */
OT_NONE = 0,
/* Read a byte(8 bits) immediate */
OT_IMM8,
/* Force a read of a word(16 bits) immediate, used by ret only */
OT_IMM16,
/* Read a word/dword immediate */
OT_IMM_FULL,
/* Read a double-word(32 bits) immediate */
OT_IMM32,
/* Read a signed extended byte(8 bits) immediate */
OT_SEIMM8,
/*
* Special immediates for instructions which have more than one immediate,
* which is an exception from standard instruction format.
* As to version v1.0: ENTER, INSERTQ, EXTRQ are the only problematic ones.
*/
/* 16 bits immediate using the first imm-slot */
OT_IMM16_1,
/* 8 bits immediate using the first imm-slot */
OT_IMM8_1,
/* 8 bits immediate using the second imm-slot */
OT_IMM8_2,
/* Use a 8bit register */
OT_REG8,
/* Use a 16bit register */
OT_REG16,
/* Use a 16/32/64bit register */
OT_REG_FULL,
/* Use a 32bit register */
OT_REG32,
/*
* If used with REX the reg operand size becomes 64 bits, otherwise 32 bits.
* VMX instructions are promoted automatically without a REX prefix.
*/
OT_REG32_64,
/* Used only by MOV CR/DR(n). Promoted with REX onlly. */
OT_FREG32_64_RM,
/* Use or read (indirection) a 8bit register or immediate byte */
OT_RM8,
/* Some instructions force 16 bits (mov sreg, rm16) */
OT_RM16,
/* Use or read a 16/32/64bit register or immediate word/dword/qword */
OT_RM_FULL,
/*
* 32 or 64 bits (with REX) operand size indirection memory operand.
* Some instructions are promoted automatically without a REX prefix.
*/
OT_RM32_64,
/* 16 or 32 bits RM. This is used only with MOVZXD instruction in 64bits. */
OT_RM16_32,
/* Same as OT_RMXX but POINTS to 16 bits [cannot use GENERAL-PURPOSE REG!] */
OT_FPUM16,
/* Same as OT_RMXX but POINTS to 32 bits (single precision) [cannot use GENERAL-PURPOSE REG!] */
OT_FPUM32,
/* Same as OT_RMXX but POINTS to 64 bits (double precision) [cannot use GENERAL-PURPOSE REG!] */
OT_FPUM64,
/* Same as OT_RMXX but POINTS to 80 bits (extended precision) [cannot use GENERAL-PURPOSE REG!] */
OT_FPUM80,
/*
* Special operand type for SSE4 where the ModR/M might
* be a 32 bits register or 8 bits memory indirection operand.
*/
OT_R32_M8,
/*
* Special ModR/M for PINSRW, which need a 16 bits memory operand or 32 bits register.
* In 16 bits decoding mode R32 becomes R16, operand size cannot affect this.
*/
OT_R32_M16,
/*
* Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or
* a 8 bits memory indirection operand.
*/
OT_R32_64_M8,
/*
* Special type for SSE4, ModR/M might be a 32 bits or 64 bits (with REX) register or
* a 16 bits memory indirection operand.
*/
OT_R32_64_M16,
/*
* Special operand type for MOV reg16/32/64/mem16, segReg 8C /r. and SMSW.
* It supports all decoding modes, but if used as a memory indirection it's a 16 bit ModR/M indirection.
*/
OT_RFULL_M16,
/* Use a control register */
OT_CREG,
/* Use a debug register */
OT_DREG,
/* Use a segment register */
OT_SREG,
/*
* SEG is encoded in the flags of the opcode itself!
* This is used for specific "push SS" where SS is a segment where
* each "push SS" has an absolutely different opcode byte.
* We need this to detect whether an operand size prefix is used.
*/
OT_SEG,
/* Use AL */
OT_ACC8,
/* Use AX (FSTSW) */
OT_ACC16,
/* Use AX/EAX/RAX */
OT_ACC_FULL,
/* Use AX/EAX, no REX is possible for RAX, used only with IN/OUT which don't support 64 bit registers */
OT_ACC_FULL_NOT64,
/*
* Read one word (seg), and a word/dword/qword (depends on operand size) from memory.
* JMP FAR [EBX] means EBX point to 16:32 ptr.
*/
OT_MEM16_FULL,
/* Read one word (seg) and a word/dword/qword (depends on operand size), usually SEG:OFF, JMP 1234:1234 */
OT_PTR16_FULL,
/* Read one word (limit) and a dword/qword (limit) (depends on operand size), used by SGDT, SIDT, LGDT, LIDT. */
OT_MEM16_3264,
/* Read a byte(8 bits) immediate and calculate it relatively to the current offset of the instruction being decoded */
OT_RELCB,
/* Read a word/dword immediate and calculate it relatively to the current offset of the instruction being decoded */
OT_RELC_FULL,
/* Use general memory indirection, with varying sizes: */
OT_MEM,
/* Used when a memory indirection is required, but if the mod field is 11, this operand will be ignored. */
OT_MEM_OPT,
OT_MEM32,
/* Memory dereference for MOVNTI, either 32 or 64 bits (with REX). */
OT_MEM32_64,
OT_MEM64,
OT_MEM128,
/* Used for cmpxchg8b/16b. */
OT_MEM64_128,
/* Read an immediate as an absolute address, size is known by instruction, used by MOV (memory offset) only */
OT_MOFFS8,
OT_MOFFS_FULL,
/* Use an immediate of 1, as for SHR R/M, 1 */
OT_CONST1,
/* Use CL, as for SHR R/M, CL */
OT_REGCL,
/*
* Instruction-Block for one byte long instructions, used by INC/DEC/PUSH/POP/XCHG,
* REG is extracted from the value of opcode
* Use a 8bit register
*/
OT_IB_RB,
/* Use a 16/32/64bit register */
OT_IB_R_FULL,
/* Use [(r)SI] as INDIRECTION, for repeatable instructions */
OT_REGI_ESI,
/* Use [(r)DI] as INDIRECTION, for repeatable instructions */
OT_REGI_EDI,
/* Use [(r)BX + AL] as INDIRECTIOM, used by XLAT only */
OT_REGI_EBXAL,
/* Use [(r)AX] as INDIRECTION, used by AMD's SVM instructions */
OT_REGI_EAX,
/* Use DX, as for OUTS DX, BYTE [SI] */
OT_REGDX,
/* Use ECX in INVLPGA instruction */
OT_REGECX,
/* FPU registers: */
OT_FPU_SI, /* ST(i) */
OT_FPU_SSI, /* ST(0), ST(i) */
OT_FPU_SIS, /* ST(i), ST(0) */
/* MMX registers: */
OT_MM,
/* Extract the MMX register from the RM bits this time (used when the REG bits are used for opcode extension) */
OT_MM_RM,
/* ModR/M points to 32 bits MMX variable */
OT_MM32,
/* ModR/M points to 32 bits MMX variable */
OT_MM64,
/* SSE registers: */
OT_XMM,
/* Extract the SSE register from the RM bits this time (used when the REG bits are used for opcode extension) */
OT_XMM_RM,
/* ModR/M points to 16 bits SSE variable */
OT_XMM16,
/* ModR/M points to 32 bits SSE variable */
OT_XMM32,
/* ModR/M points to 64 bits SSE variable */
OT_XMM64,
/* ModR/M points to 128 bits SSE variable */
OT_XMM128,
/* Implied XMM0 register as operand, used in SSE4. */
OT_REGXMM0,
/* AVX operands: */
/* ModR/M for 32 bits. */
OT_RM32,
/* Reg32/Reg64 (prefix width) or Mem8. */
OT_REG32_64_M8,
/* Reg32/Reg64 (prefix width) or Mem16. */
OT_REG32_64_M16,
/* Reg32/Reg 64 depends on prefix width only. */
OT_WREG32_64,
/* RM32/RM64 depends on prefix width only. */
OT_WRM32_64,
/* XMM or Mem32/Mem64 depends on perfix width only. */
OT_WXMM32_64,
/* XMM is encoded in VEX.VVVV. */
OT_VXMM,
/* XMM is encoded in the high nibble of an immediate byte. */
OT_XMM_IMM,
/* YMM/XMM is dependent on VEX.L. */
OT_YXMM,
/* YMM/XMM (depends on prefix length) is encoded in the high nibble of an immediate byte. */
OT_YXMM_IMM,
/* YMM is encoded in reg. */
OT_YMM,
/* YMM or Mem256. */
OT_YMM256,
/* YMM is encoded in VEX.VVVV. */
OT_VYMM,
/* YMM/XMM is dependent on VEX.L, and encoded in VEX.VVVV. */
OT_VYXMM,
/* YMM/XMM or Mem64/Mem256 is dependent on VEX.L. */
OT_YXMM64_256,
/* YMM/XMM or Mem128/Mem256 is dependent on VEX.L. */
OT_YXMM128_256,
/* XMM or Mem64/Mem256 is dependent on VEX.L. */
OT_LXMM64_128,
/* Mem128/Mem256 is dependent on VEX.L. */
OT_LMEM128_256
} _OpType;
/* Flags for instruction: */
/* Empty flags indicator: */
#define INST_FLAGS_NONE (0)
/* The instruction we are going to decode requires ModR/M encoding. */
#define INST_MODRM_REQUIRED (1)
/* Special treatment for instructions which are in the divided-category but still needs the whole byte for ModR/M... */
#define INST_NOT_DIVIDED (1 << 1)
/*
* Used explicitly in repeatable instructions,
* which needs a suffix letter in their mnemonic to specify operation-size (depend on operands).
*/
#define INST_16BITS (1 << 2)
/* If the opcode is supported by 80286 and upper models (16/32 bits). */
#define INST_32BITS (1 << 3)
/*
* Prefix flags (6 types: lock/rep, seg override, addr-size, oper-size, REX, VEX)
* There are several specific instructions that can follow LOCK prefix,
* note that they must be using a memory operand form, otherwise they generate an exception.
*/
#define INST_PRE_LOCK (1 << 4)
/* REPNZ prefix for string instructions only - means an instruction can follow it. */
#define INST_PRE_REPNZ (1 << 5)
/* REP prefix for string instructions only - means an instruction can follow it. */
#define INST_PRE_REP (1 << 6)
/* CS override prefix. */
#define INST_PRE_CS (1 << 7)
/* SS override prefix. */
#define INST_PRE_SS (1 << 8)
/* DS override prefix. */
#define INST_PRE_DS (1 << 9)
/* ES override prefix. */
#define INST_PRE_ES (1 << 10)
/* FS override prefix. Funky Segment :) */
#define INST_PRE_FS (1 << 11)
/* GS override prefix. Groovy Segment, of course not, duh ! */
#define INST_PRE_GS (1 << 12)
/* Switch operand size from 32 to 16 and vice versa. */
#define INST_PRE_OP_SIZE (1 << 13)
/* Switch address size from 32 to 16 and vice versa. */
#define INST_PRE_ADDR_SIZE (1 << 14)
/* Native instructions which needs suffix letter to indicate their operation-size (and don't depend on operands). */
#define INST_NATIVE (1 << 15)
/* Use extended mnemonic, means it's an _InstInfoEx structure, which contains another mnemonic for 32 bits specifically. */
#define INST_USE_EXMNEMONIC (1 << 16)
/* Use third operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */
#define INST_USE_OP3 (1 << 17)
/* Use fourth operand, means it's an _InstInfoEx structure, which contains another operand for special instructions. */
#define INST_USE_OP4 (1 << 18)
/* The instruction's mnemonic depends on the mod value of the ModR/M byte (mod=11, mod!=11). */
#define INST_MNEMONIC_MODRM_BASED (1 << 19)
/* The instruction uses a ModR/M byte which the MOD must be 11 (for registers operands only). */
#define INST_MODRR_REQUIRED (1 << 20)
/* The way of 3DNow! instructions are built, we have to handle their locating specially. Suffix imm8 tells which instruction it is. */
#define INST_3DNOW_FETCH (1 << 21)
/* The instruction needs two suffixes, one for the comparison type (imm8) and the second for its operation size indication (second mnemonic). */
#define INST_PSEUDO_OPCODE (1 << 22)
/* Invalid instruction at 64 bits decoding mode. */
#define INST_INVALID_64BITS (1 << 23)
/* Specific instruction can be promoted to 64 bits (without REX, it is promoted automatically). */
#define INST_64BITS (1 << 24)
/* Indicates the instruction must be REX prefixed in order to use 64 bits operands. */
#define INST_PRE_REX (1 << 25)
/* Third mnemonic is set. */
#define INST_USE_EXMNEMONIC2 (1 << 26)
/* Instruction is only valid in 64 bits decoding mode. */
#define INST_64BITS_FETCH (1 << 27)
/* Forces that the ModRM-REG/Opcode field will be 0. (For EXTRQ). */
#define INST_FORCE_REG0 (1 << 28)
/* Indicates that instruction is encoded with a VEX prefix. */
#define INST_PRE_VEX (1 << 29)
/* Indicates that the instruction is encoded with a ModRM byte (REG field specifically). */
#define INST_MODRM_INCLUDED (1 << 30)
/* Indicates that the first (/destination) operand of the instruction is writable. */
#define INST_DST_WR (1 << 31)
#define INST_PRE_REPS (INST_PRE_REPNZ | INST_PRE_REP)
#define INST_PRE_LOKREP_MASK (INST_PRE_LOCK | INST_PRE_REPNZ | INST_PRE_REP)
#define INST_PRE_SEGOVRD_MASK32 (INST_PRE_CS | INST_PRE_SS | INST_PRE_DS | INST_PRE_ES)
#define INST_PRE_SEGOVRD_MASK64 (INST_PRE_FS | INST_PRE_GS)
#define INST_PRE_SEGOVRD_MASK (INST_PRE_SEGOVRD_MASK32 | INST_PRE_SEGOVRD_MASK64)
/* Extended flags for VEX: */
/* Indicates that the instruction might have VEX.L encoded. */
#define INST_VEX_L (1)
/* Indicates that the instruction might have VEX.W encoded. */
#define INST_VEX_W (1 << 1)
/* Indicates that the mnemonic of the instruction is based on the VEX.W bit. */
#define INST_MNEMONIC_VEXW_BASED (1 << 2)
/* Indicates that the mnemonic of the instruction is based on the VEX.L bit. */
#define INST_MNEMONIC_VEXL_BASED (1 << 3)
/* Forces the instruction to be encoded with VEX.L, otherwise it's undefined. */
#define INST_FORCE_VEXL (1 << 4)
/*
* Indicates that the instruction is based on the MOD field of the ModRM byte.
* (MOD==11: got the right instruction, else skip +4 in prefixed table for the correct instruction).
*/
#define INST_MODRR_BASED (1 << 5)
/* Indicates that the instruction doesn't use the VVVV field of the VEX prefix, if it does then it's undecodable. */
#define INST_VEX_V_UNUSED (1 << 6)
/* Indication that the instruction is privileged (Ring 0), this should be checked on the opcodeId field. */
#define OPCODE_ID_PRIVILEGED ((uint16_t)0x8000)
/*
* Indicates which operand is being decoded.
* Destination (1st), Source (2nd), op3 (3rd), op4 (4th).
* Used to set the operands' fields in the _DInst structure!
*/
typedef enum {ONT_NONE = -1, ONT_1 = 0, ONT_2 = 1, ONT_3 = 2, ONT_4 = 3} _OperandNumberType;
/* CPU Flags that instructions modify, test or undefine, in compacted form (CF,PF,AF,ZF,SF are 1:1 map to EFLAGS). */
#define D_COMPACT_CF 1 /* Carry */
#define D_COMPACT_PF 4 /* Parity */
#define D_COMPACT_AF 0x10 /* Auxiliary */
#define D_COMPACT_ZF 0x40 /* Zero */
#define D_COMPACT_SF 0x80 /* Sign */
/* The following flags have to be translated to EFLAGS. */
#define D_COMPACT_IF 2 /* Interrupt */
#define D_COMPACT_DF 8 /* Direction */
#define D_COMPACT_OF 0x20 /* Overflow */
/* The mask of flags that are already compatible with EFLAGS. */
#define D_COMPACT_SAME_FLAGS (D_COMPACT_CF | D_COMPACT_PF | D_COMPACT_AF | D_COMPACT_ZF | D_COMPACT_SF)
/*
* In order to save more space for storing the DB statically,
* I came up with another level of shared info.
* Because I saw that most of the information that instructions use repeats itself.
*
* Info about the instruction, source/dest types, meta and flags.
* _InstInfo points to a table of _InstSharedInfo.
*/
typedef struct {
uint8_t flagsIndex; /* An index into FlagsTables */
uint8_t s, d; /* OpType. */
uint8_t meta; /* Hi 5 bits = Instruction set class | Lo 3 bits = flow control flags. */
/*
* The following are CPU flag masks that the instruction changes.
* The flags are compacted so 8 bits representation is enough.
* They will be expanded in runtime to be compatible to EFLAGS.
*/
uint8_t modifiedFlagsMask;
uint8_t testedFlagsMask;
uint8_t undefinedFlagsMask;
} _InstSharedInfo;
/*
* This structure is used for the instructions DB and NOT for the disassembled result code!
* This is the BASE structure, there are extensions to this structure below.
*/
typedef struct {
uint16_t sharedIndex; /* An index into the SharedInfoTable. */
uint16_t opcodeId; /* The opcodeId is really a byte-offset into the mnemonics table. MSB is a privileged indication. */
} _InstInfo;
/*
* There are merely few instructions which need a second mnemonic for 32 bits.
* Or a third for 64 bits. Therefore sometimes the second mnemonic is empty but not the third.
* In all decoding modes the first mnemonic is the default.
* A flag will indicate it uses another mnemonic.
*
* There are a couple of (SSE4) instructions in the whole DB which need both op3 and 3rd mnemonic for 64bits,
* therefore, I decided to make the extended structure contain all extra info in the same structure.
* There are a few instructions (SHLD/SHRD/IMUL and SSE too) which use third operand (or a fourth).
* A flag will indicate it uses a third/fourth operand.
*/
typedef struct {
/* Base structure (doesn't get accessed directly from code). */
_InstInfo BASE;
/* Extended starts here. */
uint8_t flagsEx; /* 8 bits are enough, in the future we might make it a bigger integer. */
uint8_t op3, op4; /* OpType. */
uint16_t opcodeId2, opcodeId3;
} _InstInfoEx;
/* Trie data structure node type: */
typedef enum {
INT_NOTEXISTS = 0, /* Not exists. */
INT_INFO = 1, /* It's an instruction info. */
INT_INFOEX,
INT_LIST_GROUP,
INT_LIST_FULL,
INT_LIST_DIVIDED,
INT_LIST_PREFIXED
} _InstNodeType;
/* Used to check instType < INT_INFOS, means we got an inst-info. Cause it has to be only one of them. */
#define INT_INFOS (INT_LIST_GROUP)
/* Instruction node is treated as { int index:13; int type:3; } */
typedef uint16_t _InstNode;
_InstInfo* inst_lookup(_CodeInfo* ci, _PrefixState* ps);
_InstInfo* inst_lookup_3dnow(_CodeInfo* ci);
#endif /* INSTRUCTIONS_H */
|