1/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains common definitions used by both the disassembler and the table
12 *  generator.
13 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *
15 *===----------------------------------------------------------------------===*/
16
17/*
18 * This header file provides those definitions that need to be shared between
19 * the decoder and the table generator in a C-friendly manner.
20 */
21
22#ifndef X86DISASSEMBLERDECODERCOMMON_H
23#define X86DISASSEMBLERDECODERCOMMON_H
24
25#include "llvm/Support/DataTypes.h"
26
27#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
28#define CONTEXTS_SYM      x86DisassemblerContexts
29#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
30#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
31#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
32#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
33#define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes
34#define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes
35
36#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
37#define CONTEXTS_STR      "x86DisassemblerContexts"
38#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
39#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
40#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
41#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
42#define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes"
43#define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"
44
45/*
46 * Attributes of an instruction that must be known before the opcode can be
47 * processed correctly.  Most of these indicate the presence of particular
48 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
49 */
50#define ATTRIBUTE_BITS          \
51  ENUM_ENTRY(ATTR_NONE,   0x00) \
52  ENUM_ENTRY(ATTR_64BIT,  0x01) \
53  ENUM_ENTRY(ATTR_XS,     0x02) \
54  ENUM_ENTRY(ATTR_XD,     0x04) \
55  ENUM_ENTRY(ATTR_REXW,   0x08) \
56  ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
57  ENUM_ENTRY(ATTR_VEX,    0x20) \
58  ENUM_ENTRY(ATTR_VEXL,   0x40)
59
60#define ENUM_ENTRY(n, v) n = v,
61enum attributeBits {
62  ATTRIBUTE_BITS
63  ATTR_max
64};
65#undef ENUM_ENTRY
66
67/*
68 * Combinations of the above attributes that are relevant to instruction
69 * decode.  Although other combinations are possible, they can be reduced to
70 * these without affecting the ultimately decoded instruction.
71 */
72
73/*           Class name           Rank  Rationale for rank assignment         */
74#define INSTRUCTION_CONTEXTS                                                   \
75  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
76  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
77                                        "64-bit mode but no more")             \
78  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
79                                        "operands change width")               \
80  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
81                                        "but not the operands")                \
82  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
83                                        "but not the operands")                \
84  ENUM_ENTRY(IC_XD_OPSIZE,          3,  "requires an OPSIZE prefix, so "       \
85                                        "operands change width")               \
86  ENUM_ENTRY(IC_XS_OPSIZE,          3,  "requires an OPSIZE prefix, so "       \
87                                        "operands change width")               \
88  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
89                                        "change width; overrides IC_OPSIZE")   \
90  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
91  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
92                                        "secondary")                           \
93  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
94  ENUM_ENTRY(IC_64BIT_XD_OPSIZE,    3,  "Just as meaningful as IC_XD_OPSIZE")  \
95  ENUM_ENTRY(IC_64BIT_XS_OPSIZE,    3,  "Just as meaningful as IC_XS_OPSIZE")  \
96  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
97                                        "opcode")                              \
98  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
99                                        "IC_64BIT_REXW_XS")                    \
100  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
101                                        "else because this changes most "      \
102                                        "operands' meaning")                   \
103  ENUM_ENTRY(IC_VEX,                1,  "requires a VEX prefix")               \
104  ENUM_ENTRY(IC_VEX_XS,             2,  "requires VEX and the XS prefix")      \
105  ENUM_ENTRY(IC_VEX_XD,             2,  "requires VEX and the XD prefix")      \
106  ENUM_ENTRY(IC_VEX_OPSIZE,         2,  "requires VEX and the OpSize prefix")  \
107  ENUM_ENTRY(IC_VEX_W,              3,  "requires VEX and the W prefix")       \
108  ENUM_ENTRY(IC_VEX_W_XS,           4,  "requires VEX, W, and XS prefix")      \
109  ENUM_ENTRY(IC_VEX_W_XD,           4,  "requires VEX, W, and XD prefix")      \
110  ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
111  ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
112  ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
113  ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XD prefix")\
114  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")
115
116
117#define ENUM_ENTRY(n, r, d) n,
118typedef enum {
119  INSTRUCTION_CONTEXTS
120  IC_max
121} InstructionContext;
122#undef ENUM_ENTRY
123
124/*
125 * Opcode types, which determine which decode table to use, both in the Intel
126 * manual and also for the decoder.
127 */
128typedef enum {
129  ONEBYTE       = 0,
130  TWOBYTE       = 1,
131  THREEBYTE_38  = 2,
132  THREEBYTE_3A  = 3,
133  THREEBYTE_A6  = 4,
134  THREEBYTE_A7  = 5
135} OpcodeType;
136
137/*
138 * The following structs are used for the hierarchical decode table.  After
139 * determining the instruction's class (i.e., which IC_* constant applies to
140 * it), the decoder reads the opcode.  Some instructions require specific
141 * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
142 *
143 * If a ModR/M byte is not required, "required" is left unset, and the values
144 * for each instructionID are identical.
145 */
146
147typedef uint16_t InstrUID;
148
149/*
150 * ModRMDecisionType - describes the type of ModR/M decision, allowing the
151 * consumer to determine the number of entries in it.
152 *
153 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
154 *                  instruction is the same.
155 * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
156 *                  corresponds to one instruction; otherwise, it corresponds to
157 *                  a different instruction.
158 * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
159 *                  to a different instruction.
160 */
161
162#define MODRMTYPES            \
163  ENUM_ENTRY(MODRM_ONEENTRY)  \
164  ENUM_ENTRY(MODRM_SPLITRM)   \
165  ENUM_ENTRY(MODRM_FULL)
166
167#define ENUM_ENTRY(n) n,
168typedef enum {
169  MODRMTYPES
170  MODRM_max
171} ModRMDecisionType;
172#undef ENUM_ENTRY
173
174/*
175 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
176 *  instruction each possible value of the ModR/M byte corresponds to.  Once
177 *  this information is known, we have narrowed down to a single instruction.
178 */
179struct ModRMDecision {
180  uint8_t     modrm_type;
181
182  /* The macro below must be defined wherever this file is included. */
183  INSTRUCTION_IDS
184};
185
186/*
187 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
188 *   given a particular opcode.
189 */
190struct OpcodeDecision {
191  struct ModRMDecision modRMDecisions[256];
192};
193
194/*
195 * ContextDecision - Specifies which opcode->instruction tables to look at given
196 *   a particular context (set of attributes).  Since there are many possible
197 *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
198 *   applies given a specific set of attributes.  Hence there are only IC_max
199 *   entries in this table, rather than 2^(ATTR_max).
200 */
201struct ContextDecision {
202  struct OpcodeDecision opcodeDecisions[IC_max];
203};
204
205/*
206 * Physical encodings of instruction operands.
207 */
208
209#define ENCODINGS                                                              \
210  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
211  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
212  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
213  ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \
214  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
215  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
216  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
217  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
218  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
219  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
220  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
221  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
222  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
223  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
224  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
225                              "the opcode byte")                               \
226  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
227  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
228  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
229  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
230                              "opcode byte")                                   \
231                                                                               \
232  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
233  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
234  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
235                              "opcode byte")                                   \
236  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
237                              "in type")
238
239#define ENUM_ENTRY(n, d) n,
240  typedef enum {
241    ENCODINGS
242    ENCODING_max
243  } OperandEncoding;
244#undef ENUM_ENTRY
245
246/*
247 * Semantic interpretations of instruction operands.
248 */
249
250#define TYPES                                                                  \
251  ENUM_ENTRY(TYPE_NONE,       "")                                              \
252  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
253  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
254  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
255  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
256  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
257  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
258  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
259  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
260  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
261  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
262  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
263  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
264  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
265  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
266  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
267  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
268  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
269  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
270  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
271  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
272  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
273  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
274  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
275  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
276  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
277  ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
278  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
279  ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
280  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
281  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
282  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
283  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
284  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
285  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
286  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
287  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
288                              "base)")                                         \
289  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
290  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
291  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
292  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
293                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
294  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
295  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
296  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
297  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
298                              "floating-point instructions")                   \
299  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
300  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
301  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
302  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
303  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
304  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
305  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
306  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
307  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
308  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
309  ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \
310  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
311  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
312  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
313  ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
314                                                                               \
315  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
316  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
317  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
318  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
319  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
320  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
321  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
322  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
323  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
324  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
325
326#define ENUM_ENTRY(n, d) n,
327typedef enum {
328  TYPES
329  TYPE_max
330} OperandType;
331#undef ENUM_ENTRY
332
333/*
334 * OperandSpecifier - The specification for how to extract and interpret one
335 *   operand.
336 */
337struct OperandSpecifier {
338  OperandEncoding  encoding;
339  OperandType      type;
340};
341
342/*
343 * Indicates where the opcode modifier (if any) is to be found.  Extended
344 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
345 */
346
347#define MODIFIER_TYPES        \
348  ENUM_ENTRY(MODIFIER_NONE)   \
349  ENUM_ENTRY(MODIFIER_OPCODE) \
350  ENUM_ENTRY(MODIFIER_MODRM)
351
352#define ENUM_ENTRY(n) n,
353typedef enum {
354  MODIFIER_TYPES
355  MODIFIER_max
356} ModifierType;
357#undef ENUM_ENTRY
358
359#define X86_MAX_OPERANDS 5
360
361/*
362 * The specification for how to extract and interpret a full instruction and
363 * its operands.
364 */
365struct InstructionSpecifier {
366  ModifierType modifierType;
367  uint8_t modifierBase;
368  struct OperandSpecifier operands[X86_MAX_OPERANDS];
369
370  /* The macro below must be defined wherever this file is included. */
371  INSTRUCTION_SPECIFIER_FIELDS
372};
373
374/*
375 * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
376 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
377 * respectively.
378 */
379typedef enum {
380  MODE_16BIT,
381  MODE_32BIT,
382  MODE_64BIT
383} DisassemblerMode;
384
385#endif
386