1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16/* Capstone Disassembly Engine */
17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
18
19#ifdef CAPSTONE_HAS_X86
20
21#include <stdarg.h>   /* for va_*()       */
22#if defined(CAPSTONE_HAS_OSXKERNEL)
23#include <libkern/libkern.h>
24#else
25#include <stdlib.h>   /* for exit()       */
26#endif
27
28#include "../../cs_priv.h"
29#include "../../utils.h"
30
31#include "X86DisassemblerDecoder.h"
32
33/// Specifies whether a ModR/M byte is needed and (if so) which
34/// instruction each possible value of the ModR/M byte corresponds to.  Once
35/// this information is known, we have narrowed down to a single instruction.
36struct ModRMDecision {
37	uint8_t modrm_type;
38	uint16_t instructionIDs;
39};
40
41/// Specifies which set of ModR/M->instruction tables to look at
42/// given a particular opcode.
43struct OpcodeDecision {
44	struct ModRMDecision modRMDecisions[256];
45};
46
47/// Specifies which opcode->instruction tables to look at given
48/// a particular context (set of attributes).  Since there are many possible
49/// contexts, the decoder first uses CONTEXTS_SYM to determine which context
50/// applies given a specific set of attributes.  Hence there are only IC_max
51/// entries in this table, rather than 2^(ATTR_max).
52struct ContextDecision {
53	struct OpcodeDecision opcodeDecisions[IC_max];
54};
55
56#ifdef CAPSTONE_X86_REDUCE
57#include "X86GenDisassemblerTables_reduce.inc"
58#else
59#include "X86GenDisassemblerTables.inc"
60#endif
61
62//#define GET_INSTRINFO_ENUM
63#define GET_INSTRINFO_MC_DESC
64#ifdef CAPSTONE_X86_REDUCE
65#include "X86GenInstrInfo_reduce.inc"
66#else
67#include "X86GenInstrInfo.inc"
68#endif
69
70/*
71 * contextForAttrs - Client for the instruction context table.  Takes a set of
72 *   attributes and returns the appropriate decode context.
73 *
74 * @param attrMask  - Attributes, from the enumeration attributeBits.
75 * @return          - The InstructionContext to use when looking up an
76 *                    an instruction with these attributes.
77 */
78static InstructionContext contextForAttrs(uint16_t attrMask)
79{
80	return CONTEXTS_SYM[attrMask];
81}
82
83/*
84 * modRMRequired - Reads the appropriate instruction table to determine whether
85 *   the ModR/M byte is required to decode a particular instruction.
86 *
87 * @param type        - The opcode type (i.e., how many bytes it has).
88 * @param insnContext - The context for the instruction, as returned by
89 *                      contextForAttrs.
90 * @param opcode      - The last byte of the instruction's opcode, not counting
91 *                      ModR/M extensions and escapes.
92 * @return            - true if the ModR/M byte is required, false otherwise.
93 */
94static int modRMRequired(OpcodeType type,
95		InstructionContext insnContext,
96		uint16_t opcode)
97{
98	const struct OpcodeDecision *decision = NULL;
99	const uint8_t *indextable = NULL;
100	uint8_t index;
101
102	switch (type) {
103		default:
104		case ONEBYTE:
105			decision = ONEBYTE_SYM;
106			indextable = index_x86DisassemblerOneByteOpcodes;
107			break;
108		case TWOBYTE:
109			decision = TWOBYTE_SYM;
110			indextable = index_x86DisassemblerTwoByteOpcodes;
111			break;
112		case THREEBYTE_38:
113			decision = THREEBYTE38_SYM;
114			indextable = index_x86DisassemblerThreeByte38Opcodes;
115			break;
116		case THREEBYTE_3A:
117			decision = THREEBYTE3A_SYM;
118			indextable = index_x86DisassemblerThreeByte3AOpcodes;
119			break;
120#ifndef CAPSTONE_X86_REDUCE
121		case XOP8_MAP:
122			decision = XOP8_MAP_SYM;
123			indextable = index_x86DisassemblerXOP8Opcodes;
124			break;
125		case XOP9_MAP:
126			decision = XOP9_MAP_SYM;
127			indextable = index_x86DisassemblerXOP9Opcodes;
128			break;
129		case XOPA_MAP:
130			decision = XOPA_MAP_SYM;
131			indextable = index_x86DisassemblerXOPAOpcodes;
132			break;
133		case T3DNOW_MAP:
134			// 3DNow instructions always have ModRM byte
135			return true;
136#endif
137	}
138
139	index = indextable[insnContext];
140	if (index)
141		return decision[index - 1].modRMDecisions[opcode].modrm_type != MODRM_ONEENTRY;
142	else
143		return false;
144}
145
146/*
147 * decode - Reads the appropriate instruction table to obtain the unique ID of
148 *   an instruction.
149 *
150 * @param type        - See modRMRequired().
151 * @param insnContext - See modRMRequired().
152 * @param opcode      - See modRMRequired().
153 * @param modRM       - The ModR/M byte if required, or any value if not.
154 * @return            - The UID of the instruction, or 0 on failure.
155 */
156static InstrUID decode(OpcodeType type,
157		InstructionContext insnContext,
158		uint8_t opcode,
159		uint8_t modRM)
160{
161	const struct ModRMDecision *dec = NULL;
162	const uint8_t *indextable = NULL;
163	uint8_t index;
164
165	switch (type) {
166		default:
167		case ONEBYTE:
168			indextable = index_x86DisassemblerOneByteOpcodes;
169			index = indextable[insnContext];
170			if (index)
171				dec = &ONEBYTE_SYM[index - 1].modRMDecisions[opcode];
172			else
173				dec = &emptyTable.modRMDecisions[opcode];
174			break;
175		case TWOBYTE:
176			indextable = index_x86DisassemblerTwoByteOpcodes;
177			index = indextable[insnContext];
178			if (index)
179				dec = &TWOBYTE_SYM[index - 1].modRMDecisions[opcode];
180			else
181				dec = &emptyTable.modRMDecisions[opcode];
182			break;
183		case THREEBYTE_38:
184			indextable = index_x86DisassemblerThreeByte38Opcodes;
185			index = indextable[insnContext];
186			if (index)
187				dec = &THREEBYTE38_SYM[index - 1].modRMDecisions[opcode];
188			else
189				dec = &emptyTable.modRMDecisions[opcode];
190			break;
191		case THREEBYTE_3A:
192			indextable = index_x86DisassemblerThreeByte3AOpcodes;
193			index = indextable[insnContext];
194			if (index)
195				dec = &THREEBYTE3A_SYM[index - 1].modRMDecisions[opcode];
196			else
197				dec = &emptyTable.modRMDecisions[opcode];
198			break;
199#ifndef CAPSTONE_X86_REDUCE
200		case XOP8_MAP:
201			indextable = index_x86DisassemblerXOP8Opcodes;
202			index = indextable[insnContext];
203			if (index)
204				dec = &XOP8_MAP_SYM[index - 1].modRMDecisions[opcode];
205			else
206				dec = &emptyTable.modRMDecisions[opcode];
207			break;
208		case XOP9_MAP:
209			indextable = index_x86DisassemblerXOP9Opcodes;
210			index = indextable[insnContext];
211			if (index)
212				dec = &XOP9_MAP_SYM[index - 1].modRMDecisions[opcode];
213			else
214				dec = &emptyTable.modRMDecisions[opcode];
215			break;
216		case XOPA_MAP:
217			indextable = index_x86DisassemblerXOPAOpcodes;
218			index = indextable[insnContext];
219			if (index)
220				dec = &XOPA_MAP_SYM[index - 1].modRMDecisions[opcode];
221			else
222				dec = &emptyTable.modRMDecisions[opcode];
223			break;
224		case T3DNOW_MAP:
225			indextable = index_x86DisassemblerT3DNOWOpcodes;
226			index = indextable[insnContext];
227			if (index)
228				dec = &T3DNOW_MAP_SYM[index - 1].modRMDecisions[opcode];
229			else
230				dec = &emptyTable.modRMDecisions[opcode];
231			break;
232#endif
233	}
234
235	switch (dec->modrm_type) {
236		default:
237			//debug("Corrupt table!  Unknown modrm_type");
238			return 0;
239		case MODRM_ONEENTRY:
240			return modRMTable[dec->instructionIDs];
241		case MODRM_SPLITRM:
242			if (modFromModRM(modRM) == 0x3)
243				return modRMTable[dec->instructionIDs+1];
244			return modRMTable[dec->instructionIDs];
245		case MODRM_SPLITREG:
246			if (modFromModRM(modRM) == 0x3)
247				return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
248			return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
249		case MODRM_SPLITMISC:
250			if (modFromModRM(modRM) == 0x3)
251				return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
252			return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
253		case MODRM_FULL:
254			return modRMTable[dec->instructionIDs+modRM];
255	}
256}
257
258/*
259 * specifierForUID - Given a UID, returns the name and operand specification for
260 *   that instruction.
261 *
262 * @param uid - The unique ID for the instruction.  This should be returned by
263 *              decode(); specifierForUID will not check bounds.
264 * @return    - A pointer to the specification for that instruction.
265 */
266static const struct InstructionSpecifier *specifierForUID(InstrUID uid)
267{
268	return &INSTRUCTIONS_SYM[uid];
269}
270
271/*
272 * consumeByte - Uses the reader function provided by the user to consume one
273 *   byte from the instruction's memory and advance the cursor.
274 *
275 * @param insn  - The instruction with the reader function to use.  The cursor
276 *                for this instruction is advanced.
277 * @param byte  - A pointer to a pre-allocated memory buffer to be populated
278 *                with the data read.
279 * @return      - 0 if the read was successful; nonzero otherwise.
280 */
281static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
282{
283	int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
284
285	if (!ret)
286		++(insn->readerCursor);
287
288	return ret;
289}
290
291/*
292 * lookAtByte - Like consumeByte, but does not advance the cursor.
293 *
294 * @param insn  - See consumeByte().
295 * @param byte  - See consumeByte().
296 * @return      - See consumeByte().
297 */
298static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
299{
300	return insn->reader(insn->readerArg, byte, insn->readerCursor);
301}
302
303static void unconsumeByte(struct InternalInstruction *insn)
304{
305	insn->readerCursor--;
306}
307
308#define CONSUME_FUNC(name, type)                                  \
309	static int name(struct InternalInstruction *insn, type *ptr) {  \
310		type combined = 0;                                            \
311		unsigned offset;                                              \
312		for (offset = 0; offset < sizeof(type); ++offset) {           \
313			uint8_t byte;                                               \
314			int ret = insn->reader(insn->readerArg,                     \
315					&byte,                               \
316					insn->readerCursor + offset);        \
317			if (ret)                                                    \
318			return ret;                                               \
319			combined = combined | (type)((uint64_t)byte << (offset * 8));     \
320		}                                                             \
321		*ptr = combined;                                              \
322		insn->readerCursor += sizeof(type);                           \
323		return 0;                                                     \
324	}
325
326/*
327 * consume* - Use the reader function provided by the user to consume data
328 *   values of various sizes from the instruction's memory and advance the
329 *   cursor appropriately.  These readers perform endian conversion.
330 *
331 * @param insn    - See consumeByte().
332 * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
333 *                  be populated with the data read.
334 * @return        - See consumeByte().
335 */
336CONSUME_FUNC(consumeInt8, int8_t)
337CONSUME_FUNC(consumeInt16, int16_t)
338CONSUME_FUNC(consumeInt32, int32_t)
339CONSUME_FUNC(consumeUInt16, uint16_t)
340CONSUME_FUNC(consumeUInt32, uint32_t)
341CONSUME_FUNC(consumeUInt64, uint64_t)
342
343/*
344 * setPrefixPresent - Marks that a particular prefix is present at a particular
345 *   location.
346 *
347 * @param insn      - The instruction to be marked as having the prefix.
348 * @param prefix    - The prefix that is present.
349 * @param location  - The location where the prefix is located (in the address
350 *                    space of the instruction's reader).
351 */
352static void setPrefixPresent(struct InternalInstruction *insn,
353		uint8_t prefix, uint64_t location)
354{
355	switch (prefix) {
356	case 0x26:
357		insn->isPrefix26 = true;
358		insn->prefix26 = location;
359		break;
360	case 0x2e:
361		insn->isPrefix2e = true;
362		insn->prefix2e = location;
363		break;
364	case 0x36:
365		insn->isPrefix36 = true;
366		insn->prefix36 = location;
367		break;
368	case 0x3e:
369		insn->isPrefix3e = true;
370		insn->prefix3e = location;
371		break;
372	case 0x64:
373		insn->isPrefix64 = true;
374		insn->prefix64 = location;
375		break;
376	case 0x65:
377		insn->isPrefix65 = true;
378		insn->prefix65 = location;
379		break;
380	case 0x66:
381		insn->isPrefix66 = true;
382		insn->prefix66 = location;
383		break;
384	case 0x67:
385		insn->isPrefix67 = true;
386		insn->prefix67 = location;
387		break;
388	case 0xf0:
389		insn->isPrefixf0 = true;
390		insn->prefixf0 = location;
391		break;
392	case 0xf2:
393		insn->isPrefixf2 = true;
394		insn->prefixf2 = location;
395		break;
396	case 0xf3:
397		insn->isPrefixf3 = true;
398		insn->prefixf3 = location;
399		break;
400	default:
401		break;
402	}
403}
404
405/*
406 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
407 *   present at a given location.
408 *
409 * @param insn      - The instruction to be queried.
410 * @param prefix    - The prefix.
411 * @param location  - The location to query.
412 * @return          - Whether the prefix is at that location.
413 */
414static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix,
415		uint64_t location)
416{
417	switch (prefix) {
418	case 0x26:
419		if (insn->isPrefix26 && insn->prefix26 == location)
420			return true;
421		break;
422	case 0x2e:
423		if (insn->isPrefix2e && insn->prefix2e == location)
424			return true;
425		break;
426	case 0x36:
427		if (insn->isPrefix36 && insn->prefix36 == location)
428			return true;
429		break;
430	case 0x3e:
431		if (insn->isPrefix3e && insn->prefix3e == location)
432			return true;
433		break;
434	case 0x64:
435		if (insn->isPrefix64 && insn->prefix64 == location)
436			return true;
437		break;
438	case 0x65:
439		if (insn->isPrefix65 && insn->prefix65 == location)
440			return true;
441		break;
442	case 0x66:
443		if (insn->isPrefix66 && insn->prefix66 == location)
444			return true;
445		break;
446	case 0x67:
447		if (insn->isPrefix67 && insn->prefix67 == location)
448			return true;
449		break;
450	case 0xf0:
451		if (insn->isPrefixf0 && insn->prefixf0 == location)
452			return true;
453		break;
454	case 0xf2:
455		if (insn->isPrefixf2 && insn->prefixf2 == location)
456			return true;
457		break;
458	case 0xf3:
459		if (insn->isPrefixf3 && insn->prefixf3 == location)
460			return true;
461		break;
462	default:
463		break;
464	}
465	return false;
466}
467
468/*
469 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
470 *   instruction as having them.  Also sets the instruction's default operand,
471 *   address, and other relevant data sizes to report operands correctly.
472 *
473 * @param insn  - The instruction whose prefixes are to be read.
474 * @return      - 0 if the instruction could be read until the end of the prefix
475 *                bytes, and no prefixes conflicted; nonzero otherwise.
476 */
477static int readPrefixes(struct InternalInstruction *insn)
478{
479	bool isPrefix = true;
480	uint64_t prefixLocation;
481	uint8_t byte = 0, nextByte;
482
483	bool hasAdSize = false;
484	bool hasOpSize = false;
485
486	while (isPrefix) {
487		if (insn->mode == MODE_64BIT) {
488			// eliminate consecutive redundant REX bytes in front
489			if (consumeByte(insn, &byte))
490				return -1;
491
492			if ((byte & 0xf0) == 0x40) {
493				while(true) {
494					if (lookAtByte(insn, &byte))	// out of input code
495						return -1;
496					if ((byte & 0xf0) == 0x40) {
497						// another REX prefix, but we only remember the last one
498						if (consumeByte(insn, &byte))
499							return -1;
500					} else
501						break;
502				}
503
504				// recover the last REX byte if next byte is not a legacy prefix
505				switch (byte) {
506					case 0xf2:  /* REPNE/REPNZ */
507					case 0xf3:  /* REP or REPE/REPZ */
508					case 0xf0:  /* LOCK */
509					case 0x2e:  /* CS segment override -OR- Branch not taken */
510					case 0x36:  /* SS segment override -OR- Branch taken */
511					case 0x3e:  /* DS segment override */
512					case 0x26:  /* ES segment override */
513					case 0x64:  /* FS segment override */
514					case 0x65:  /* GS segment override */
515					case 0x66:  /* Operand-size override */
516					case 0x67:  /* Address-size override */
517						break;
518					default:    /* Not a prefix byte */
519						unconsumeByte(insn);
520						break;
521				}
522			} else {
523				unconsumeByte(insn);
524			}
525		}
526
527		prefixLocation = insn->readerCursor;
528
529		/* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
530		if (consumeByte(insn, &byte))
531			return -1;
532
533		if (insn->readerCursor - 1 == insn->startLocation
534				&& (byte == 0xf2 || byte == 0xf3)) {
535
536			if (lookAtByte(insn, &nextByte))
537				return -1;
538
539			/*
540			 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
541			 * met:
542			 * - it is followed by a LOCK (0xf0) prefix
543			 * - it is followed by an xchg instruction
544			 * then it should be disassembled as a xacquire/xrelease not repne/rep.
545			 */
546			if ((byte == 0xf2 || byte == 0xf3) &&
547					((nextByte == 0xf0) |
548					 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
549				insn->xAcquireRelease = true;
550			/*
551			 * Also if the byte is 0xf3, and the following condition is met:
552			 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
553			 *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
554			 * then it should be disassembled as an xrelease not rep.
555			 */
556			if (byte == 0xf3 &&
557					(nextByte == 0x88 || nextByte == 0x89 ||
558					 nextByte == 0xc6 || nextByte == 0xc7))
559				insn->xAcquireRelease = true;
560
561			if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
562				if (consumeByte(insn, &nextByte))
563					return -1;
564				if (lookAtByte(insn, &nextByte))
565					return -1;
566				unconsumeByte(insn);
567			}
568		}
569
570		switch (byte) {
571			case 0xf2:  /* REPNE/REPNZ */
572			case 0xf3:  /* REP or REPE/REPZ */
573			case 0xf0:  /* LOCK */
574				// only accept the last prefix
575				insn->isPrefixf2 = false;
576				insn->isPrefixf3 = false;
577				insn->isPrefixf0 = false;
578				setPrefixPresent(insn, byte, prefixLocation);
579				insn->prefix0 = byte;
580				break;
581			case 0x2e:  /* CS segment override -OR- Branch not taken */
582				insn->segmentOverride = SEG_OVERRIDE_CS;
583				// only accept the last prefix
584				insn->isPrefix2e = false;
585				insn->isPrefix36 = false;
586				insn->isPrefix3e = false;
587				insn->isPrefix26 = false;
588				insn->isPrefix64 = false;
589				insn->isPrefix65 = false;
590
591				setPrefixPresent(insn, byte, prefixLocation);
592				insn->prefix1 = byte;
593				break;
594			case 0x36:  /* SS segment override -OR- Branch taken */
595				insn->segmentOverride = SEG_OVERRIDE_SS;
596				// only accept the last prefix
597				insn->isPrefix2e = false;
598				insn->isPrefix36 = false;
599				insn->isPrefix3e = false;
600				insn->isPrefix26 = false;
601				insn->isPrefix64 = false;
602				insn->isPrefix65 = false;
603
604				setPrefixPresent(insn, byte, prefixLocation);
605				insn->prefix1 = byte;
606				break;
607			case 0x3e:  /* DS segment override */
608				insn->segmentOverride = SEG_OVERRIDE_DS;
609				// only accept the last prefix
610				insn->isPrefix2e = false;
611				insn->isPrefix36 = false;
612				insn->isPrefix3e = false;
613				insn->isPrefix26 = false;
614				insn->isPrefix64 = false;
615				insn->isPrefix65 = false;
616
617				setPrefixPresent(insn, byte, prefixLocation);
618				insn->prefix1 = byte;
619				break;
620			case 0x26:  /* ES segment override */
621				insn->segmentOverride = SEG_OVERRIDE_ES;
622				// only accept the last prefix
623				insn->isPrefix2e = false;
624				insn->isPrefix36 = false;
625				insn->isPrefix3e = false;
626				insn->isPrefix26 = false;
627				insn->isPrefix64 = false;
628				insn->isPrefix65 = false;
629
630				setPrefixPresent(insn, byte, prefixLocation);
631				insn->prefix1 = byte;
632				break;
633			case 0x64:  /* FS segment override */
634				insn->segmentOverride = SEG_OVERRIDE_FS;
635				// only accept the last prefix
636				insn->isPrefix2e = false;
637				insn->isPrefix36 = false;
638				insn->isPrefix3e = false;
639				insn->isPrefix26 = false;
640				insn->isPrefix64 = false;
641				insn->isPrefix65 = false;
642
643				setPrefixPresent(insn, byte, prefixLocation);
644				insn->prefix1 = byte;
645				break;
646			case 0x65:  /* GS segment override */
647				insn->segmentOverride = SEG_OVERRIDE_GS;
648				// only accept the last prefix
649				insn->isPrefix2e = false;
650				insn->isPrefix36 = false;
651				insn->isPrefix3e = false;
652				insn->isPrefix26 = false;
653				insn->isPrefix64 = false;
654				insn->isPrefix65 = false;
655
656				setPrefixPresent(insn, byte, prefixLocation);
657				insn->prefix1 = byte;
658				break;
659			case 0x66:  /* Operand-size override */
660				hasOpSize = true;
661				setPrefixPresent(insn, byte, prefixLocation);
662				insn->prefix2 = byte;
663				break;
664			case 0x67:  /* Address-size override */
665				hasAdSize = true;
666				setPrefixPresent(insn, byte, prefixLocation);
667				insn->prefix3 = byte;
668				break;
669			default:    /* Not a prefix byte */
670				isPrefix = false;
671				break;
672		}
673
674		//if (isPrefix)
675		//	dbgprintf(insn, "Found prefix 0x%hhx", byte);
676	}
677
678	insn->vectorExtensionType = TYPE_NO_VEX_XOP;
679
680
681	if (byte == 0x62) {
682		uint8_t byte1, byte2;
683
684		if (consumeByte(insn, &byte1)) {
685			//dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
686			return -1;
687		}
688
689		if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
690				((~byte1 & 0xc) == 0xc)) {
691			if (lookAtByte(insn, &byte2)) {
692				//dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
693				return -1;
694			}
695
696			if ((byte2 & 0x4) == 0x4) {
697				insn->vectorExtensionType = TYPE_EVEX;
698			} else {
699				unconsumeByte(insn); /* unconsume byte1 */
700				unconsumeByte(insn); /* unconsume byte  */
701				insn->necessaryPrefixLocation = insn->readerCursor - 2;
702			}
703
704			if (insn->vectorExtensionType == TYPE_EVEX) {
705				insn->vectorExtensionPrefix[0] = byte;
706				insn->vectorExtensionPrefix[1] = byte1;
707
708				if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
709					//dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
710					return -1;
711				}
712
713				if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
714					//dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
715					return -1;
716				}
717
718				/* We simulate the REX prefix for simplicity's sake */
719				if (insn->mode == MODE_64BIT) {
720					insn->rexPrefix = 0x40
721						| (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
722						| (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
723						| (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
724						| (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
725				}
726
727				//dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
728				//		insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
729				//		insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
730			}
731		} else {
732			// BOUND instruction
733			unconsumeByte(insn); /* unconsume byte1 */
734			unconsumeByte(insn); /* unconsume byte */
735		}
736	} else if (byte == 0xc4) {
737		uint8_t byte1;
738
739		if (lookAtByte(insn, &byte1)) {
740			//dbgprintf(insn, "Couldn't read second byte of VEX");
741			return -1;
742		}
743
744		if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
745			insn->vectorExtensionType = TYPE_VEX_3B;
746			insn->necessaryPrefixLocation = insn->readerCursor - 1;
747		} else {
748			unconsumeByte(insn);
749			insn->necessaryPrefixLocation = insn->readerCursor - 1;
750		}
751
752		if (insn->vectorExtensionType == TYPE_VEX_3B) {
753			insn->vectorExtensionPrefix[0] = byte;
754			if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
755				return -1;
756			if (consumeByte(insn, &insn->vectorExtensionPrefix[2]))
757				return -1;
758
759			/* We simulate the REX prefix for simplicity's sake */
760			if (insn->mode == MODE_64BIT) {
761				insn->rexPrefix = 0x40
762					| (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
763					| (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
764					| (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
765					| (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
766
767			}
768		}
769	} else if (byte == 0xc5) {
770		uint8_t byte1;
771
772		if (lookAtByte(insn, &byte1)) {
773			//dbgprintf(insn, "Couldn't read second byte of VEX");
774			return -1;
775		}
776
777		if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
778			insn->vectorExtensionType = TYPE_VEX_2B;
779		} else {
780			unconsumeByte(insn);
781		}
782
783		if (insn->vectorExtensionType == TYPE_VEX_2B) {
784			insn->vectorExtensionPrefix[0] = byte;
785			if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
786				return -1;
787
788			if (insn->mode == MODE_64BIT) {
789				insn->rexPrefix = 0x40
790					| (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
791			}
792
793			switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
794				default:
795					break;
796				case VEX_PREFIX_66:
797					hasOpSize = true;
798					break;
799			}
800		}
801	} else if (byte == 0x8f) {
802		uint8_t byte1;
803
804		if (lookAtByte(insn, &byte1)) {
805			// dbgprintf(insn, "Couldn't read second byte of XOP");
806			return -1;
807		}
808
809		if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
810			insn->vectorExtensionType = TYPE_XOP;
811			insn->necessaryPrefixLocation = insn->readerCursor - 1;
812		} else {
813			unconsumeByte(insn);
814			insn->necessaryPrefixLocation = insn->readerCursor - 1;
815		}
816
817		if (insn->vectorExtensionType == TYPE_XOP) {
818			insn->vectorExtensionPrefix[0] = byte;
819			if (consumeByte(insn, &insn->vectorExtensionPrefix[1]))
820				return -1;
821			if (consumeByte(insn, &insn->vectorExtensionPrefix[2]))
822				return -1;
823
824			/* We simulate the REX prefix for simplicity's sake */
825			if (insn->mode == MODE_64BIT) {
826				insn->rexPrefix = 0x40
827					| (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
828					| (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
829					| (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
830					| (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
831			}
832
833			switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
834				default:
835					break;
836				case VEX_PREFIX_66:
837					hasOpSize = true;
838					break;
839			}
840		}
841	} else {
842		if (insn->mode == MODE_64BIT) {
843			if ((byte & 0xf0) == 0x40) {
844				uint8_t opcodeByte;
845
846				while(true) {
847					if (lookAtByte(insn, &opcodeByte))	// out of input code
848						return -1;
849					if ((opcodeByte & 0xf0) == 0x40) {
850						// another REX prefix, but we only remember the last one
851						if (consumeByte(insn, &byte))
852							return -1;
853					} else
854						break;
855				}
856
857				insn->rexPrefix = byte;
858				insn->necessaryPrefixLocation = insn->readerCursor - 2;
859				// dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
860			} else {
861				unconsumeByte(insn);
862				insn->necessaryPrefixLocation = insn->readerCursor - 1;
863			}
864		} else {
865			unconsumeByte(insn);
866			insn->necessaryPrefixLocation = insn->readerCursor - 1;
867		}
868	}
869
870	if (insn->mode == MODE_16BIT) {
871		insn->registerSize       = (hasOpSize ? 4 : 2);
872		insn->addressSize        = (hasAdSize ? 4 : 2);
873		insn->displacementSize   = (hasAdSize ? 4 : 2);
874		insn->immediateSize      = (hasOpSize ? 4 : 2);
875		insn->immSize = (hasOpSize ? 4 : 2);
876	} else if (insn->mode == MODE_32BIT) {
877		insn->registerSize       = (hasOpSize ? 2 : 4);
878		insn->addressSize        = (hasAdSize ? 2 : 4);
879		insn->displacementSize   = (hasAdSize ? 2 : 4);
880		insn->immediateSize      = (hasOpSize ? 2 : 4);
881		insn->immSize = (hasOpSize ? 2 : 4);
882	} else if (insn->mode == MODE_64BIT) {
883		if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
884			insn->registerSize       = 8;
885			insn->addressSize        = (hasAdSize ? 4 : 8);
886			insn->displacementSize   = 4;
887			insn->immediateSize      = 4;
888			insn->immSize      = 4;
889		} else if (insn->rexPrefix) {
890			insn->registerSize       = (hasOpSize ? 2 : 4);
891			insn->addressSize        = (hasAdSize ? 4 : 8);
892			insn->displacementSize   = (hasOpSize ? 2 : 4);
893			insn->immediateSize      = (hasOpSize ? 2 : 4);
894			insn->immSize      = (hasOpSize ? 2 : 4);
895		} else {
896			insn->registerSize       = (hasOpSize ? 2 : 4);
897			insn->addressSize        = (hasAdSize ? 4 : 8);
898			insn->displacementSize   = (hasOpSize ? 2 : 4);
899			insn->immediateSize      = (hasOpSize ? 2 : 4);
900			insn->immSize      = (hasOpSize ? 4 : 8);
901		}
902	}
903
904	return 0;
905}
906
907static int readModRM(struct InternalInstruction *insn);
908
909/*
910 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
911 *   extended or escape opcodes).
912 *
913 * @param insn  - The instruction whose opcode is to be read.
914 * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
915 */
916static int readOpcode(struct InternalInstruction *insn)
917{
918	/* Determine the length of the primary opcode */
919	uint8_t current;
920
921	// printf(">>> readOpcode() = %x\n", insn->readerCursor);
922
923	insn->opcodeType = ONEBYTE;
924	insn->firstByte = 0x00;
925
926	if (insn->vectorExtensionType == TYPE_EVEX) {
927		switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
928			default:
929				// dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
930				// 		mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
931				return -1;
932			case VEX_LOB_0F:
933				insn->opcodeType = TWOBYTE;
934				return consumeByte(insn, &insn->opcode);
935			case VEX_LOB_0F38:
936				insn->opcodeType = THREEBYTE_38;
937				return consumeByte(insn, &insn->opcode);
938			case VEX_LOB_0F3A:
939				insn->opcodeType = THREEBYTE_3A;
940				return consumeByte(insn, &insn->opcode);
941		}
942	} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
943		switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
944			default:
945				// dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
946				//		mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
947				return -1;
948			case VEX_LOB_0F:
949				insn->twoByteEscape = 0x0f;
950				insn->opcodeType = TWOBYTE;
951				return consumeByte(insn, &insn->opcode);
952			case VEX_LOB_0F38:
953				insn->twoByteEscape = 0x0f;
954				insn->threeByteEscape = 0x38;
955				insn->opcodeType = THREEBYTE_38;
956				return consumeByte(insn, &insn->opcode);
957			case VEX_LOB_0F3A:
958				insn->twoByteEscape = 0x0f;
959				insn->threeByteEscape = 0x3a;
960				insn->opcodeType = THREEBYTE_3A;
961				return consumeByte(insn, &insn->opcode);
962		}
963	} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
964		insn->twoByteEscape = 0x0f;
965		insn->opcodeType = TWOBYTE;
966		return consumeByte(insn, &insn->opcode);
967	} else if (insn->vectorExtensionType == TYPE_XOP) {
968		switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
969			default:
970				// dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
971				// 		mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
972				return -1;
973			case XOP_MAP_SELECT_8:
974				// FIXME: twoByteEscape?
975				insn->opcodeType = XOP8_MAP;
976				return consumeByte(insn, &insn->opcode);
977			case XOP_MAP_SELECT_9:
978				// FIXME: twoByteEscape?
979				insn->opcodeType = XOP9_MAP;
980				return consumeByte(insn, &insn->opcode);
981			case XOP_MAP_SELECT_A:
982				// FIXME: twoByteEscape?
983				insn->opcodeType = XOPA_MAP;
984				return consumeByte(insn, &insn->opcode);
985		}
986	}
987
988	if (consumeByte(insn, &current))
989		return -1;
990
991	// save this first byte for MOVcr, MOVdr, MOVrc, MOVrd
992	insn->firstByte = current;
993
994	if (current == 0x0f) {
995		// dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
996
997		insn->twoByteEscape = current;
998
999		if (consumeByte(insn, &current))
1000			return -1;
1001
1002		if (current == 0x38) {
1003			// dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
1004
1005			insn->threeByteEscape = current;
1006
1007			if (consumeByte(insn, &current))
1008				return -1;
1009
1010			insn->opcodeType = THREEBYTE_38;
1011		} else if (current == 0x3a) {
1012			// dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
1013
1014			insn->threeByteEscape = current;
1015
1016			if (consumeByte(insn, &current))
1017				return -1;
1018
1019			insn->opcodeType = THREEBYTE_3A;
1020		} else {
1021#ifndef CAPSTONE_X86_REDUCE
1022			switch(current) {
1023				default:
1024					// dbgprintf(insn, "Didn't find a three-byte escape prefix");
1025					insn->opcodeType = TWOBYTE;
1026					break;
1027				case 0x0e:	// HACK for femms. to be handled properly in next version 3.x
1028					insn->opcodeType = T3DNOW_MAP;
1029					// this encode does not have ModRM
1030					insn->consumedModRM = true;
1031					break;
1032				case 0x0f:
1033					// 3DNow instruction has weird format: ModRM/SIB/displacement + opcode
1034					if (readModRM(insn))
1035						return -1;
1036					// next is 3DNow opcode
1037					if (consumeByte(insn, &current))
1038						return -1;
1039					insn->opcodeType = T3DNOW_MAP;
1040					break;
1041			}
1042#endif
1043		}
1044	}
1045
1046	/*
1047	 * At this point we have consumed the full opcode.
1048	 * Anything we consume from here on must be unconsumed.
1049	 */
1050
1051	insn->opcode = current;
1052
1053	return 0;
1054}
1055
1056// Hacky for FEMMS
1057#define GET_INSTRINFO_ENUM
1058#ifndef CAPSTONE_X86_REDUCE
1059#include "X86GenInstrInfo.inc"
1060#else
1061#include "X86GenInstrInfo_reduce.inc"
1062#endif
1063
1064/*
1065 * getIDWithAttrMask - Determines the ID of an instruction, consuming
1066 *   the ModR/M byte as appropriate for extended and escape opcodes,
1067 *   and using a supplied attribute mask.
1068 *
1069 * @param instructionID - A pointer whose target is filled in with the ID of the
1070 *                        instruction.
1071 * @param insn          - The instruction whose ID is to be determined.
1072 * @param attrMask      - The attribute mask to search.
1073 * @return              - 0 if the ModR/M could be read when needed or was not
1074 *                        needed; nonzero otherwise.
1075 */
1076static int getIDWithAttrMask(uint16_t *instructionID,
1077		struct InternalInstruction *insn,
1078		uint16_t attrMask)
1079{
1080	bool hasModRMExtension;
1081
1082	InstructionContext instructionClass;
1083
1084#ifndef CAPSTONE_X86_REDUCE
1085	// HACK for femms. to be handled properly in next version 3.x
1086	if (insn->opcode == 0x0e && insn->opcodeType == T3DNOW_MAP) {
1087		*instructionID = X86_FEMMS;
1088		return 0;
1089	}
1090#endif
1091
1092	if (insn->opcodeType == T3DNOW_MAP)
1093		instructionClass = IC_OF;
1094	else
1095		instructionClass = contextForAttrs(attrMask);
1096
1097	hasModRMExtension = modRMRequired(insn->opcodeType,
1098			instructionClass,
1099			insn->opcode) != 0;
1100
1101	if (hasModRMExtension) {
1102		if (readModRM(insn))
1103			return -1;
1104
1105		*instructionID = decode(insn->opcodeType,
1106				instructionClass,
1107				insn->opcode,
1108				insn->modRM);
1109	} else {
1110		*instructionID = decode(insn->opcodeType,
1111				instructionClass,
1112				insn->opcode,
1113				0);
1114	}
1115
1116	return 0;
1117}
1118
1119/*
1120 * is16BitEquivalent - Determines whether two instruction names refer to
1121 * equivalent instructions but one is 16-bit whereas the other is not.
1122 *
1123 * @param orig  - The instruction ID that is not 16-bit
1124 * @param equiv - The instruction ID that is 16-bit
1125 */
1126static bool is16BitEquivalent(unsigned orig, unsigned equiv)
1127{
1128	size_t i;
1129	uint16_t idx;
1130
1131	if ((idx = x86_16_bit_eq_lookup[orig]) != 0) {
1132		for (i = idx - 1; i < ARR_SIZE(x86_16_bit_eq_tbl) && x86_16_bit_eq_tbl[i].first == orig; i++) {
1133			if (x86_16_bit_eq_tbl[i].second == equiv)
1134				return true;
1135		}
1136	}
1137
1138	return false;
1139}
1140
1141/*
1142 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
1143 *   appropriate for extended and escape opcodes.  Determines the attributes and
1144 *   context for the instruction before doing so.
1145 *
1146 * @param insn  - The instruction whose ID is to be determined.
1147 * @return      - 0 if the ModR/M could be read when needed or was not needed;
1148 *                nonzero otherwise.
1149 */
1150static int getID(struct InternalInstruction *insn)
1151{
1152	uint16_t attrMask;
1153	uint16_t instructionID;
1154	const struct InstructionSpecifier *spec;
1155
1156	// printf(">>> getID()\n");
1157	attrMask = ATTR_NONE;
1158
1159	if (insn->mode == MODE_64BIT)
1160		attrMask |= ATTR_64BIT;
1161
1162	if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1163		attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1164
1165		if (insn->vectorExtensionType == TYPE_EVEX) {
1166			switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1167				case VEX_PREFIX_66:
1168					attrMask |= ATTR_OPSIZE;
1169					break;
1170				case VEX_PREFIX_F3:
1171					attrMask |= ATTR_XS;
1172					break;
1173				case VEX_PREFIX_F2:
1174					attrMask |= ATTR_XD;
1175					break;
1176			}
1177
1178			if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1179				attrMask |= ATTR_EVEXKZ;
1180			if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1181				attrMask |= ATTR_EVEXB;
1182			if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1183				attrMask |= ATTR_EVEXK;
1184			if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1185				attrMask |= ATTR_EVEXL;
1186			if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1187				attrMask |= ATTR_EVEXL2;
1188		} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1189			switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1190				case VEX_PREFIX_66:
1191					attrMask |= ATTR_OPSIZE;
1192					break;
1193				case VEX_PREFIX_F3:
1194					attrMask |= ATTR_XS;
1195					break;
1196				case VEX_PREFIX_F2:
1197					attrMask |= ATTR_XD;
1198					break;
1199			}
1200
1201			if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1202				attrMask |= ATTR_VEXL;
1203		} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1204			switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1205				case VEX_PREFIX_66:
1206					attrMask |= ATTR_OPSIZE;
1207					break;
1208				case VEX_PREFIX_F3:
1209					attrMask |= ATTR_XS;
1210					break;
1211				case VEX_PREFIX_F2:
1212					attrMask |= ATTR_XD;
1213					break;
1214			}
1215
1216			if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1217				attrMask |= ATTR_VEXL;
1218		} else if (insn->vectorExtensionType == TYPE_XOP) {
1219			switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1220				case VEX_PREFIX_66:
1221					attrMask |= ATTR_OPSIZE;
1222					break;
1223				case VEX_PREFIX_F3:
1224					attrMask |= ATTR_XS;
1225					break;
1226				case VEX_PREFIX_F2:
1227					attrMask |= ATTR_XD;
1228					break;
1229			}
1230
1231			if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1232				attrMask |= ATTR_VEXL;
1233		} else {
1234			return -1;
1235		}
1236	} else {
1237		if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
1238			attrMask |= ATTR_OPSIZE;
1239		} else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) {
1240			attrMask |= ATTR_ADSIZE;
1241		} else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) {
1242			attrMask |= ATTR_XS;
1243		} else if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) {
1244			attrMask |= ATTR_XD;
1245		}
1246	}
1247
1248	if (insn->rexPrefix & 0x08)
1249		attrMask |= ATTR_REXW;
1250
1251	if (getIDWithAttrMask(&instructionID, insn, attrMask))
1252		return -1;
1253
1254	/* Fixing CALL and JMP instruction when in 64bit mode and x66 prefix is used */
1255	if (insn->mode == MODE_64BIT && insn->isPrefix66 &&
1256	   (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1257	{
1258		attrMask ^= ATTR_OPSIZE;
1259		if (getIDWithAttrMask(&instructionID, insn, attrMask))
1260			return -1;
1261	}
1262
1263
1264	/*
1265	 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1266	 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
1267	 */
1268	if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) {
1269		spec = specifierForUID(instructionID);
1270
1271		/*
1272		 * Check for Ii8PCRel instructions. We could alternatively do a
1273		 * string-compare on the names, but this is probably cheaper.
1274		 */
1275		if (x86OperandSets[spec->operands][0].type == TYPE_REL8) {
1276			attrMask ^= ATTR_ADSIZE;
1277			if (getIDWithAttrMask(&instructionID, insn, attrMask))
1278				return -1;
1279		}
1280	}
1281
1282	/* The following clauses compensate for limitations of the tables. */
1283	if ((insn->mode == MODE_16BIT || insn->isPrefix66) &&
1284			!(attrMask & ATTR_OPSIZE)) {
1285		/*
1286		 * The instruction tables make no distinction between instructions that
1287		 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1288		 * particular spot (i.e., many MMX operations).  In general we're
1289		 * conservative, but in the specific case where OpSize is present but not
1290		 * in the right place we check if there's a 16-bit operation.
1291		 */
1292
1293		const struct InstructionSpecifier *spec;
1294		uint16_t instructionIDWithOpsize;
1295
1296		spec = specifierForUID(instructionID);
1297
1298		if (getIDWithAttrMask(&instructionIDWithOpsize,
1299					insn, attrMask | ATTR_OPSIZE)) {
1300			/*
1301			 * ModRM required with OpSize but not present; give up and return version
1302			 * without OpSize set
1303			 */
1304
1305			insn->instructionID = instructionID;
1306			insn->spec = spec;
1307			return 0;
1308		}
1309
1310		if (is16BitEquivalent(instructionID, instructionIDWithOpsize) &&
1311				(insn->mode == MODE_16BIT) ^ insn->isPrefix66) {
1312			insn->instructionID = instructionIDWithOpsize;
1313			insn->spec = specifierForUID(instructionIDWithOpsize);
1314		} else {
1315			insn->instructionID = instructionID;
1316			insn->spec = spec;
1317		}
1318		return 0;
1319	}
1320
1321	if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1322			insn->rexPrefix & 0x01) {
1323		/*
1324		 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1325		 * it should decode as XCHG %r8, %eax.
1326		 */
1327
1328		const struct InstructionSpecifier *spec;
1329		uint16_t instructionIDWithNewOpcode;
1330		const struct InstructionSpecifier *specWithNewOpcode;
1331
1332		spec = specifierForUID(instructionID);
1333
1334		/* Borrow opcode from one of the other XCHGar opcodes */
1335		insn->opcode = 0x91;
1336
1337		if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1338					insn,
1339					attrMask)) {
1340			insn->opcode = 0x90;
1341
1342			insn->instructionID = instructionID;
1343			insn->spec = spec;
1344			return 0;
1345		}
1346
1347		specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1348
1349		/* Change back */
1350		insn->opcode = 0x90;
1351
1352		insn->instructionID = instructionIDWithNewOpcode;
1353		insn->spec = specWithNewOpcode;
1354
1355		return 0;
1356	}
1357
1358	insn->instructionID = instructionID;
1359	insn->spec = specifierForUID(insn->instructionID);
1360
1361	return 0;
1362}
1363
1364/*
1365 * readSIB - Consumes the SIB byte to determine addressing information for an
1366 *   instruction.
1367 *
1368 * @param insn  - The instruction whose SIB byte is to be read.
1369 * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
1370 */
1371static int readSIB(struct InternalInstruction *insn)
1372{
1373	SIBIndex sibIndexBase = SIB_INDEX_NONE;
1374	SIBBase sibBaseBase = SIB_BASE_NONE;
1375	uint8_t index, base;
1376
1377	// dbgprintf(insn, "readSIB()");
1378
1379	if (insn->consumedSIB)
1380		return 0;
1381
1382	insn->consumedSIB = true;
1383
1384	switch (insn->addressSize) {
1385		case 2:
1386			// dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1387			return -1;
1388		case 4:
1389			sibIndexBase = SIB_INDEX_EAX;
1390			sibBaseBase = SIB_BASE_EAX;
1391			break;
1392		case 8:
1393			sibIndexBase = SIB_INDEX_RAX;
1394			sibBaseBase = SIB_BASE_RAX;
1395			break;
1396	}
1397
1398	if (consumeByte(insn, &insn->sib))
1399		return -1;
1400
1401	index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1402	if (insn->vectorExtensionType == TYPE_EVEX)
1403		index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
1404
1405	switch (index) {
1406		case 0x4:
1407			insn->sibIndex = SIB_INDEX_NONE;
1408			break;
1409		default:
1410			insn->sibIndex = (SIBIndex)(sibIndexBase + index);
1411			if (insn->sibIndex == SIB_INDEX_sib ||
1412					insn->sibIndex == SIB_INDEX_sib64)
1413				insn->sibIndex = SIB_INDEX_NONE;
1414			break;
1415	}
1416
1417	switch (scaleFromSIB(insn->sib)) {
1418		case 0:
1419			insn->sibScale = 1;
1420			break;
1421		case 1:
1422			insn->sibScale = 2;
1423			break;
1424		case 2:
1425			insn->sibScale = 4;
1426			break;
1427		case 3:
1428			insn->sibScale = 8;
1429			break;
1430	}
1431
1432	base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1433
1434	switch (base) {
1435		case 0x5:
1436		case 0xd:
1437			switch (modFromModRM(insn->modRM)) {
1438				case 0x0:
1439					insn->eaDisplacement = EA_DISP_32;
1440					insn->sibBase = SIB_BASE_NONE;
1441					break;
1442				case 0x1:
1443					insn->eaDisplacement = EA_DISP_8;
1444					insn->sibBase = (SIBBase)(sibBaseBase + base);
1445					break;
1446				case 0x2:
1447					insn->eaDisplacement = EA_DISP_32;
1448					insn->sibBase = (SIBBase)(sibBaseBase + base);
1449					break;
1450				case 0x3:
1451					//debug("Cannot have Mod = 0b11 and a SIB byte");
1452					return -1;
1453			}
1454			break;
1455		default:
1456			insn->sibBase = (SIBBase)(sibBaseBase + base);
1457			break;
1458	}
1459
1460	return 0;
1461}
1462
1463/*
1464 * readDisplacement - Consumes the displacement of an instruction.
1465 *
1466 * @param insn  - The instruction whose displacement is to be read.
1467 * @return      - 0 if the displacement byte was successfully read; nonzero
1468 *                otherwise.
1469 */
1470static int readDisplacement(struct InternalInstruction *insn)
1471{
1472	int8_t d8;
1473	int16_t d16;
1474	int32_t d32;
1475
1476	// dbgprintf(insn, "readDisplacement()");
1477
1478	if (insn->consumedDisplacement)
1479		return 0;
1480
1481	insn->consumedDisplacement = true;
1482	insn->displacementOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1483
1484	switch (insn->eaDisplacement) {
1485		case EA_DISP_NONE:
1486			insn->consumedDisplacement = false;
1487			break;
1488		case EA_DISP_8:
1489			if (consumeInt8(insn, &d8))
1490				return -1;
1491			insn->displacement = d8;
1492			break;
1493		case EA_DISP_16:
1494			if (consumeInt16(insn, &d16))
1495				return -1;
1496			insn->displacement = d16;
1497			break;
1498		case EA_DISP_32:
1499			if (consumeInt32(insn, &d32))
1500				return -1;
1501			insn->displacement = d32;
1502			break;
1503	}
1504
1505	insn->consumedDisplacement = true;
1506	return 0;
1507}
1508
1509/*
1510 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1511 *   displacement) for an instruction and interprets it.
1512 *
1513 * @param insn  - The instruction whose addressing information is to be read.
1514 * @return      - 0 if the information was successfully read; nonzero otherwise.
1515 */
1516static int readModRM(struct InternalInstruction *insn)
1517{
1518	uint8_t mod, rm, reg;
1519
1520	// dbgprintf(insn, "readModRM()");
1521
1522	// already got ModRM byte?
1523	if (insn->consumedModRM)
1524		return 0;
1525
1526	if (consumeByte(insn, &insn->modRM))
1527		return -1;
1528
1529	// mark that we already got ModRM
1530	insn->consumedModRM = true;
1531
1532	// save original ModRM for later reference
1533	insn->orgModRM = insn->modRM;
1534
1535	// handle MOVcr, MOVdr, MOVrc, MOVrd by pretending they have MRM.mod = 3
1536	if ((insn->firstByte == 0x0f && insn->opcodeType == TWOBYTE) &&
1537			(insn->opcode >= 0x20 && insn->opcode <= 0x23 ))
1538		insn->modRM |= 0xC0;
1539
1540	mod     = modFromModRM(insn->modRM);
1541	rm      = rmFromModRM(insn->modRM);
1542	reg     = regFromModRM(insn->modRM);
1543
1544	/*
1545	 * This goes by insn->registerSize to pick the correct register, which messes
1546	 * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
1547	 * fixupReg().
1548	 */
1549	switch (insn->registerSize) {
1550		case 2:
1551			insn->regBase = MODRM_REG_AX;
1552			insn->eaRegBase = EA_REG_AX;
1553			break;
1554		case 4:
1555			insn->regBase = MODRM_REG_EAX;
1556			insn->eaRegBase = EA_REG_EAX;
1557			break;
1558		case 8:
1559			insn->regBase = MODRM_REG_RAX;
1560			insn->eaRegBase = EA_REG_RAX;
1561			break;
1562	}
1563
1564	reg |= rFromREX(insn->rexPrefix) << 3;
1565	rm  |= bFromREX(insn->rexPrefix) << 3;
1566	if (insn->vectorExtensionType == TYPE_EVEX) {
1567		reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1568		rm  |=  xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1569	}
1570
1571	insn->reg = (Reg)(insn->regBase + reg);
1572
1573	switch (insn->addressSize) {
1574		case 2:
1575			insn->eaBaseBase = EA_BASE_BX_SI;
1576
1577			switch (mod) {
1578				case 0x0:
1579					if (rm == 0x6) {
1580						insn->eaBase = EA_BASE_NONE;
1581						insn->eaDisplacement = EA_DISP_16;
1582						if (readDisplacement(insn))
1583							return -1;
1584					} else {
1585						insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1586						insn->eaDisplacement = EA_DISP_NONE;
1587					}
1588					break;
1589				case 0x1:
1590					insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1591					insn->eaDisplacement = EA_DISP_8;
1592					insn->displacementSize = 1;
1593					if (readDisplacement(insn))
1594						return -1;
1595					break;
1596				case 0x2:
1597					insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1598					insn->eaDisplacement = EA_DISP_16;
1599					if (readDisplacement(insn))
1600						return -1;
1601					break;
1602				case 0x3:
1603					insn->eaBase = (EABase)(insn->eaRegBase + rm);
1604					insn->eaDisplacement = EA_DISP_NONE;
1605					if (readDisplacement(insn))
1606						return -1;
1607					break;
1608			}
1609			break;
1610		case 4:
1611		case 8:
1612			insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1613
1614			switch (mod) {
1615				case 0x0:
1616					insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1617					switch (rm) {
1618						case 0x14:
1619						case 0x4:
1620						case 0xc:   /* in case REXW.b is set */
1621							insn->eaBase = (insn->addressSize == 4 ?
1622									EA_BASE_sib : EA_BASE_sib64);
1623							if (readSIB(insn) || readDisplacement(insn))
1624								return -1;
1625							break;
1626						case 0x5:
1627						case 0xd:
1628							insn->eaBase = EA_BASE_NONE;
1629							insn->eaDisplacement = EA_DISP_32;
1630							if (readDisplacement(insn))
1631								return -1;
1632							break;
1633						default:
1634							insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1635							break;
1636					}
1637
1638					break;
1639				case 0x1:
1640					insn->displacementSize = 1;
1641					/* FALLTHROUGH */
1642				case 0x2:
1643					insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1644					switch (rm) {
1645						case 0x14:
1646						case 0x4:
1647						case 0xc:   /* in case REXW.b is set */
1648							insn->eaBase = EA_BASE_sib;
1649							if (readSIB(insn) || readDisplacement(insn))
1650								return -1;
1651							break;
1652						default:
1653							insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1654							if (readDisplacement(insn))
1655								return -1;
1656							break;
1657					}
1658					break;
1659				case 0x3:
1660					insn->eaDisplacement = EA_DISP_NONE;
1661					insn->eaBase = (EABase)(insn->eaRegBase + rm);
1662					break;
1663			}
1664			break;
1665	} /* switch (insn->addressSize) */
1666
1667	return 0;
1668}
1669
1670#define GENERIC_FIXUP_FUNC(name, base, prefix)            \
1671	static uint8_t name(struct InternalInstruction *insn,   \
1672			OperandType type,                   \
1673			uint8_t index,                      \
1674			uint8_t *valid) {                   \
1675		*valid = 1;                                           \
1676		switch (type) {                                       \
1677			case TYPE_R8:       \
1678			    insn->operandSize = 1; \
1679				break; \
1680			case TYPE_R16:      \
1681			    insn->operandSize = 2; \
1682				break; \
1683			case TYPE_R32:      \
1684			    insn->operandSize = 4; \
1685				break; \
1686			case TYPE_R64:      \
1687			    insn->operandSize = 8; \
1688				break; \
1689			case TYPE_XMM512:   \
1690			    insn->operandSize = 64; \
1691				break; \
1692			case TYPE_XMM256:   \
1693			    insn->operandSize = 32; \
1694				break; \
1695			case TYPE_XMM128:   \
1696			    insn->operandSize = 16; \
1697				break; \
1698			case TYPE_XMM64:    \
1699			    insn->operandSize = 8; \
1700				break; \
1701			case TYPE_XMM32:    \
1702			    insn->operandSize = 4; \
1703				break; \
1704			case TYPE_XMM:      \
1705			    insn->operandSize = 2; \
1706				break; \
1707			case TYPE_MM64:     \
1708			    insn->operandSize = 8; \
1709				break; \
1710			case TYPE_MM32:     \
1711			    insn->operandSize = 4; \
1712				break; \
1713			case TYPE_MM:       \
1714			    insn->operandSize = 2; \
1715				break; \
1716			case TYPE_CONTROLREG: \
1717			    insn->operandSize = 4; \
1718				break; \
1719			default: break; \
1720		} \
1721		switch (type) {                                           \
1722			default:                                              \
1723				*valid = 0;                                       \
1724				return 0;                                         \
1725			case TYPE_Rv:                                         \
1726				return (uint8_t)(base + index);                   \
1727			case TYPE_R8:                                         \
1728				if (insn->rexPrefix &&                            \
1729					index >= 4 && index <= 7) { \
1730					return prefix##_SPL + (index - 4);        \
1731				} else {                                      \
1732					return prefix##_AL + index;               \
1733				}                                             \
1734			case TYPE_R16:                                        \
1735				return prefix##_AX + index;                       \
1736			case TYPE_R32:                                        \
1737				return prefix##_EAX + index;                      \
1738			case TYPE_R64:                                        \
1739				return prefix##_RAX + index;                      \
1740			case TYPE_XMM512:                                     \
1741				return prefix##_ZMM0 + index;                     \
1742			case TYPE_XMM256:                                     \
1743				return prefix##_YMM0 + index;                     \
1744			case TYPE_XMM128:                                     \
1745			case TYPE_XMM64:                                      \
1746			case TYPE_XMM32:                                      \
1747			case TYPE_XMM:                                        \
1748				return prefix##_XMM0 + index;                     \
1749			case TYPE_VK1:                                        \
1750			case TYPE_VK8:                                        \
1751			case TYPE_VK16:                                       \
1752				if (index > 7)                                    \
1753					*valid = 0;                                   \
1754				return prefix##_K0 + index;                       \
1755			case TYPE_MM64:                                       \
1756			case TYPE_MM32:                                       \
1757			case TYPE_MM:                                         \
1758				return prefix##_MM0 + (index & 7);                \
1759			case TYPE_SEGMENTREG:                                 \
1760				if (index > 5)                                    \
1761					*valid = 0;                                   \
1762				return prefix##_ES + index;                       \
1763			case TYPE_DEBUGREG:                                   \
1764				if (index > 7)                                    \
1765					*valid = 0;                                   \
1766				return prefix##_DR0 + index;                      \
1767			case TYPE_CONTROLREG:                                 \
1768				return prefix##_CR0 + index;                      \
1769		}                                                         \
1770	}
1771
1772/*
1773 * fixup*Value - Consults an operand type to determine the meaning of the
1774 *   reg or R/M field.  If the operand is an XMM operand, for example, an
1775 *   operand would be XMM0 instead of AX, which readModRM() would otherwise
1776 *   misinterpret it as.
1777 *
1778 * @param insn  - The instruction containing the operand.
1779 * @param type  - The operand type.
1780 * @param index - The existing value of the field as reported by readModRM().
1781 * @param valid - The address of a uint8_t.  The target is set to 1 if the
1782 *                field is valid for the register class; 0 if not.
1783 * @return      - The proper value.
1784 */
1785GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
1786GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
1787
1788/*
1789 * fixupReg - Consults an operand specifier to determine which of the
1790 *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
1791 *
1792 * @param insn  - See fixup*Value().
1793 * @param op    - The operand specifier.
1794 * @return      - 0 if fixup was successful; -1 if the register returned was
1795 *                invalid for its class.
1796 */
1797static int fixupReg(struct InternalInstruction *insn,
1798		const struct OperandSpecifier *op)
1799{
1800	uint8_t valid;
1801
1802	// dbgprintf(insn, "fixupReg()");
1803
1804	switch ((OperandEncoding)op->encoding) {
1805		default:
1806			//debug("Expected a REG or R/M encoding in fixupReg");
1807			return -1;
1808		case ENCODING_VVVV:
1809			insn->vvvv = (Reg)fixupRegValue(insn,
1810					(OperandType)op->type,
1811					insn->vvvv,
1812					&valid);
1813			if (!valid)
1814				return -1;
1815			break;
1816		case ENCODING_REG:
1817			insn->reg = (Reg)fixupRegValue(insn,
1818					(OperandType)op->type,
1819					(uint8_t)(insn->reg - insn->regBase),
1820					&valid);
1821			if (!valid)
1822				return -1;
1823			break;
1824		CASE_ENCODING_RM:
1825			if (insn->eaBase >= insn->eaRegBase) {
1826				insn->eaBase = (EABase)fixupRMValue(insn,
1827						(OperandType)op->type,
1828						(uint8_t)(insn->eaBase - insn->eaRegBase),
1829						&valid);
1830				if (!valid)
1831					return -1;
1832			}
1833			break;
1834	}
1835
1836	return 0;
1837}
1838
1839/*
1840 * readOpcodeRegister - Reads an operand from the opcode field of an
1841 *   instruction and interprets it appropriately given the operand width.
1842 *   Handles AddRegFrm instructions.
1843 *
1844 * @param insn  - the instruction whose opcode field is to be read.
1845 * @param size  - The width (in bytes) of the register being specified.
1846 *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1847 *                RAX.
1848 * @return      - 0 on success; nonzero otherwise.
1849 */
1850static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
1851{
1852	// dbgprintf(insn, "readOpcodeRegister()");
1853
1854	if (size == 0)
1855		size = insn->registerSize;
1856
1857	insn->operandSize = size;
1858
1859	switch (size) {
1860		case 1:
1861			insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1862						| (insn->opcode & 7)));
1863			if (insn->rexPrefix &&
1864					insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1865					insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1866				insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1867						+ (insn->opcodeRegister - MODRM_REG_AL - 4));
1868			}
1869
1870			break;
1871		case 2:
1872			insn->opcodeRegister = (Reg)(MODRM_REG_AX
1873					+ ((bFromREX(insn->rexPrefix) << 3)
1874						| (insn->opcode & 7)));
1875			break;
1876		case 4:
1877			insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1878					+ ((bFromREX(insn->rexPrefix) << 3)
1879						| (insn->opcode & 7)));
1880			break;
1881		case 8:
1882			insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1883					+ ((bFromREX(insn->rexPrefix) << 3)
1884						| (insn->opcode & 7)));
1885			break;
1886	}
1887
1888	return 0;
1889}
1890
1891/*
1892 * readImmediate - Consumes an immediate operand from an instruction, given the
1893 *   desired operand size.
1894 *
1895 * @param insn  - The instruction whose operand is to be read.
1896 * @param size  - The width (in bytes) of the operand.
1897 * @return      - 0 if the immediate was successfully consumed; nonzero
1898 *                otherwise.
1899 */
1900static int readImmediate(struct InternalInstruction *insn, uint8_t size)
1901{
1902	uint8_t imm8;
1903	uint16_t imm16;
1904	uint32_t imm32;
1905	uint64_t imm64;
1906
1907	// dbgprintf(insn, "readImmediate()");
1908
1909	if (insn->numImmediatesConsumed == 2) {
1910		//debug("Already consumed two immediates");
1911		return -1;
1912	}
1913
1914	if (size == 0)
1915		size = insn->immediateSize;
1916	else
1917		insn->immediateSize = size;
1918	insn->immediateOffset = (uint8_t)(insn->readerCursor - insn->startLocation);
1919
1920	switch (size) {
1921		case 1:
1922			if (consumeByte(insn, &imm8))
1923				return -1;
1924			insn->immediates[insn->numImmediatesConsumed] = imm8;
1925			break;
1926		case 2:
1927			if (consumeUInt16(insn, &imm16))
1928				return -1;
1929			insn->immediates[insn->numImmediatesConsumed] = imm16;
1930			break;
1931		case 4:
1932			if (consumeUInt32(insn, &imm32))
1933				return -1;
1934			insn->immediates[insn->numImmediatesConsumed] = imm32;
1935			break;
1936		case 8:
1937			if (consumeUInt64(insn, &imm64))
1938				return -1;
1939			insn->immediates[insn->numImmediatesConsumed] = imm64;
1940			break;
1941	}
1942
1943	insn->numImmediatesConsumed++;
1944
1945	return 0;
1946}
1947
1948/*
1949 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1950 *
1951 * @param insn  - The instruction whose operand is to be read.
1952 * @return      - 0 if the vvvv was successfully consumed; nonzero
1953 *                otherwise.
1954 */
1955static int readVVVV(struct InternalInstruction *insn)
1956{
1957	int vvvv;
1958	// dbgprintf(insn, "readVVVV()");
1959
1960	if (insn->vectorExtensionType == TYPE_EVEX)
1961		vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1962				vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1963	else if (insn->vectorExtensionType == TYPE_VEX_3B)
1964		vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1965	else if (insn->vectorExtensionType == TYPE_VEX_2B)
1966		vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1967	else if (insn->vectorExtensionType == TYPE_XOP)
1968		vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1969	else
1970		return -1;
1971
1972	if (insn->mode != MODE_64BIT)
1973		vvvv &= 0x7;
1974
1975	insn->vvvv = vvvv;
1976
1977	return 0;
1978}
1979
1980/*
1981 * readMaskRegister - Reads an mask register from the opcode field of an
1982 *   instruction.
1983 *
1984 * @param insn    - The instruction whose opcode field is to be read.
1985 * @return        - 0 on success; nonzero otherwise.
1986 */
1987static int readMaskRegister(struct InternalInstruction *insn)
1988{
1989	// dbgprintf(insn, "readMaskRegister()");
1990
1991	if (insn->vectorExtensionType != TYPE_EVEX)
1992		return -1;
1993
1994	insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1995
1996	return 0;
1997}
1998
1999/*
2000 * readOperands - Consults the specifier for an instruction and consumes all
2001 *   operands for that instruction, interpreting them as it goes.
2002 *
2003 * @param insn  - The instruction whose operands are to be read and interpreted.
2004 * @return      - 0 if all operands could be read; nonzero otherwise.
2005 */
2006static int readOperands(struct InternalInstruction *insn)
2007{
2008	int index;
2009	int hasVVVV, needVVVV;
2010	int sawRegImm = 0;
2011
2012	// printf(">>> readOperands()\n");
2013	/* If non-zero vvvv specified, need to make sure one of the operands
2014	   uses it. */
2015	hasVVVV = !readVVVV(insn);
2016	needVVVV = hasVVVV && (insn->vvvv != 0);
2017
2018	for (index = 0; index < X86_MAX_OPERANDS; ++index) {
2019		//printf(">>> encoding[%u] = %u\n", index, x86OperandSets[insn->spec->operands][index].encoding);
2020		switch (x86OperandSets[insn->spec->operands][index].encoding) {
2021			case ENCODING_NONE:
2022			case ENCODING_SI:
2023			case ENCODING_DI:
2024				break;
2025			case ENCODING_REG:
2026			CASE_ENCODING_RM:
2027				if (readModRM(insn))
2028					return -1;
2029				if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
2030					return -1;
2031				// Apply the AVX512 compressed displacement scaling factor.
2032				if (x86OperandSets[insn->spec->operands][index].encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
2033					insn->displacement *= 1 << (x86OperandSets[insn->spec->operands][index].encoding - ENCODING_RM);
2034				break;
2035			case ENCODING_CB:
2036			case ENCODING_CW:
2037			case ENCODING_CD:
2038			case ENCODING_CP:
2039			case ENCODING_CO:
2040			case ENCODING_CT:
2041				// dbgprintf(insn, "We currently don't hande code-offset encodings");
2042				return -1;
2043			case ENCODING_IB:
2044				if (sawRegImm) {
2045					/* Saw a register immediate so don't read again and instead split the
2046					   previous immediate.  FIXME: This is a hack. */
2047					insn->immediates[insn->numImmediatesConsumed] =
2048						insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
2049					++insn->numImmediatesConsumed;
2050					break;
2051				}
2052				if (readImmediate(insn, 1))
2053					return -1;
2054				if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
2055						x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
2056					sawRegImm = 1;
2057				break;
2058			case ENCODING_IW:
2059				if (readImmediate(insn, 2))
2060					return -1;
2061				break;
2062			case ENCODING_ID:
2063				if (readImmediate(insn, 4))
2064					return -1;
2065				break;
2066			case ENCODING_IO:
2067				if (readImmediate(insn, 8))
2068					return -1;
2069				break;
2070			case ENCODING_Iv:
2071				if (readImmediate(insn, insn->immediateSize))
2072					return -1;
2073				break;
2074			case ENCODING_Ia:
2075				if (readImmediate(insn, insn->addressSize))
2076					return -1;
2077				break;
2078			case ENCODING_RB:
2079				if (readOpcodeRegister(insn, 1))
2080					return -1;
2081				break;
2082			case ENCODING_RW:
2083				if (readOpcodeRegister(insn, 2))
2084					return -1;
2085				break;
2086			case ENCODING_RD:
2087				if (readOpcodeRegister(insn, 4))
2088					return -1;
2089				break;
2090			case ENCODING_RO:
2091				if (readOpcodeRegister(insn, 8))
2092					return -1;
2093				break;
2094			case ENCODING_Rv:
2095				if (readOpcodeRegister(insn, 0))
2096					return -1;
2097				break;
2098			case ENCODING_FP:
2099				break;
2100			case ENCODING_VVVV:
2101				needVVVV = 0; /* Mark that we have found a VVVV operand. */
2102				if (!hasVVVV)
2103					return -1;
2104				if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
2105					return -1;
2106				break;
2107			case ENCODING_WRITEMASK:
2108				if (readMaskRegister(insn))
2109					return -1;
2110				break;
2111			case ENCODING_DUP:
2112				break;
2113			default:
2114				// dbgprintf(insn, "Encountered an operand with an unknown encoding.");
2115				return -1;
2116		}
2117	}
2118
2119	/* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
2120	if (needVVVV) return -1;
2121
2122	return 0;
2123}
2124
2125// return True if instruction is illegal to use with prefixes
2126// This also check & fix the prefixPresent[] when a prefix is irrelevant.
2127static bool checkPrefix(struct InternalInstruction *insn)
2128{
2129	// LOCK prefix
2130	if (insn->isPrefixf0) {
2131		switch(insn->instructionID) {
2132			default:
2133				// invalid LOCK
2134				return true;
2135
2136			// nop dword [rax]
2137			case X86_NOOPL:
2138
2139			// DEC
2140			case X86_DEC16m:
2141			case X86_DEC32m:
2142			case X86_DEC64_16m:
2143			case X86_DEC64_32m:
2144			case X86_DEC64m:
2145			case X86_DEC8m:
2146
2147			// ADC
2148			case X86_ADC16mi:
2149			case X86_ADC16mi8:
2150			case X86_ADC16mr:
2151			case X86_ADC32mi:
2152			case X86_ADC32mi8:
2153			case X86_ADC32mr:
2154			case X86_ADC64mi32:
2155			case X86_ADC64mi8:
2156			case X86_ADC64mr:
2157			case X86_ADC8mi:
2158			case X86_ADC8mr:
2159
2160			// ADD
2161			case X86_ADD16mi:
2162			case X86_ADD16mi8:
2163			case X86_ADD16mr:
2164			case X86_ADD32mi:
2165			case X86_ADD32mi8:
2166			case X86_ADD32mr:
2167			case X86_ADD64mi32:
2168			case X86_ADD64mi8:
2169			case X86_ADD64mr:
2170			case X86_ADD8mi:
2171			case X86_ADD8mr:
2172
2173			// AND
2174			case X86_AND16mi:
2175			case X86_AND16mi8:
2176			case X86_AND16mr:
2177			case X86_AND32mi:
2178			case X86_AND32mi8:
2179			case X86_AND32mr:
2180			case X86_AND64mi32:
2181			case X86_AND64mi8:
2182			case X86_AND64mr:
2183			case X86_AND8mi:
2184			case X86_AND8mr:
2185
2186			// BTC
2187			case X86_BTC16mi8:
2188			case X86_BTC16mr:
2189			case X86_BTC32mi8:
2190			case X86_BTC32mr:
2191			case X86_BTC64mi8:
2192			case X86_BTC64mr:
2193
2194			// BTR
2195			case X86_BTR16mi8:
2196			case X86_BTR16mr:
2197			case X86_BTR32mi8:
2198			case X86_BTR32mr:
2199			case X86_BTR64mi8:
2200			case X86_BTR64mr:
2201
2202			// BTS
2203			case X86_BTS16mi8:
2204			case X86_BTS16mr:
2205			case X86_BTS32mi8:
2206			case X86_BTS32mr:
2207			case X86_BTS64mi8:
2208			case X86_BTS64mr:
2209
2210			// CMPXCHG
2211			case X86_CMPXCHG16B:
2212			case X86_CMPXCHG16rm:
2213			case X86_CMPXCHG32rm:
2214			case X86_CMPXCHG64rm:
2215			case X86_CMPXCHG8rm:
2216			case X86_CMPXCHG8B:
2217
2218			// INC
2219			case X86_INC16m:
2220			case X86_INC32m:
2221			case X86_INC64_16m:
2222			case X86_INC64_32m:
2223			case X86_INC64m:
2224			case X86_INC8m:
2225
2226			// NEG
2227			case X86_NEG16m:
2228			case X86_NEG32m:
2229			case X86_NEG64m:
2230			case X86_NEG8m:
2231
2232			// NOT
2233			case X86_NOT16m:
2234			case X86_NOT32m:
2235			case X86_NOT64m:
2236			case X86_NOT8m:
2237
2238			// OR
2239			case X86_OR16mi:
2240			case X86_OR16mi8:
2241			case X86_OR16mr:
2242			case X86_OR32mi:
2243			case X86_OR32mi8:
2244			case X86_OR32mr:
2245			case X86_OR32mrLocked:
2246			case X86_OR64mi32:
2247			case X86_OR64mi8:
2248			case X86_OR64mr:
2249			case X86_OR8mi:
2250			case X86_OR8mr:
2251
2252			// SBB
2253			case X86_SBB16mi:
2254			case X86_SBB16mi8:
2255			case X86_SBB16mr:
2256			case X86_SBB32mi:
2257			case X86_SBB32mi8:
2258			case X86_SBB32mr:
2259			case X86_SBB64mi32:
2260			case X86_SBB64mi8:
2261			case X86_SBB64mr:
2262			case X86_SBB8mi:
2263			case X86_SBB8mr:
2264
2265			// SUB
2266			case X86_SUB16mi:
2267			case X86_SUB16mi8:
2268			case X86_SUB16mr:
2269			case X86_SUB32mi:
2270			case X86_SUB32mi8:
2271			case X86_SUB32mr:
2272			case X86_SUB64mi32:
2273			case X86_SUB64mi8:
2274			case X86_SUB64mr:
2275			case X86_SUB8mi:
2276			case X86_SUB8mr:
2277
2278			// XADD
2279			case X86_XADD16rm:
2280			case X86_XADD32rm:
2281			case X86_XADD64rm:
2282			case X86_XADD8rm:
2283
2284			// XCHG
2285			case X86_XCHG16rm:
2286			case X86_XCHG32rm:
2287			case X86_XCHG64rm:
2288			case X86_XCHG8rm:
2289
2290			// XOR
2291			case X86_XOR16mi:
2292			case X86_XOR16mi8:
2293			case X86_XOR16mr:
2294			case X86_XOR32mi:
2295			case X86_XOR32mi8:
2296			case X86_XOR32mr:
2297			case X86_XOR64mi32:
2298			case X86_XOR64mi8:
2299			case X86_XOR64mr:
2300			case X86_XOR8mi:
2301			case X86_XOR8mr:
2302
2303				// this instruction can be used with LOCK prefix
2304				return false;
2305		}
2306	}
2307
2308	// REPNE prefix
2309	if (insn->isPrefixf2) {
2310		// 0xf2 can be a part of instruction encoding, but not really a prefix.
2311		// In such a case, clear it.
2312		if (insn->twoByteEscape == 0x0f) {
2313			insn->prefix0 = 0;
2314		}
2315	}
2316
2317	// no invalid prefixes
2318	return false;
2319}
2320
2321/*
2322 * decodeInstruction - Reads and interprets a full instruction provided by the
2323 *   user.
2324 *
2325 * @param insn      - A pointer to the instruction to be populated.  Must be
2326 *                    pre-allocated.
2327 * @param reader    - The function to be used to read the instruction's bytes.
2328 * @param readerArg - A generic argument to be passed to the reader to store
2329 *                    any internal state.
2330 * @param startLoc  - The address (in the reader's address space) of the first
2331 *                    byte in the instruction.
2332 * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
2333 *                    decode the instruction in.
2334 * @return          - 0 if instruction is valid; nonzero if not.
2335 */
2336int decodeInstruction(struct InternalInstruction *insn,
2337		byteReader_t reader,
2338		const void *readerArg,
2339		uint64_t startLoc,
2340		DisassemblerMode mode)
2341{
2342	insn->reader = reader;
2343	insn->readerArg = readerArg;
2344	insn->startLocation = startLoc;
2345	insn->readerCursor = startLoc;
2346	insn->mode = mode;
2347
2348	if (readPrefixes(insn)       ||
2349			readOpcode(insn)         ||
2350			getID(insn)      ||
2351			insn->instructionID == 0 ||
2352			checkPrefix(insn) ||
2353			readOperands(insn))
2354		return -1;
2355
2356	insn->length = (size_t)(insn->readerCursor - insn->startLocation);
2357
2358	// instruction length must be <= 15 to be valid
2359	if (insn->length > 15)
2360		return -1;
2361
2362	if (insn->operandSize == 0)
2363		insn->operandSize = insn->registerSize;
2364
2365	insn->operands = &x86OperandSets[insn->spec->operands][0];
2366
2367	// dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
2368	// 		startLoc, insn->readerCursor, insn->length);
2369
2370	//if (insn->length > 15)
2371	//	dbgprintf(insn, "Instruction exceeds 15-byte limit");
2372
2373#if 0
2374	printf("\n>>> x86OperandSets = %lu\n", sizeof(x86OperandSets));
2375	printf(">>> x86DisassemblerInstrSpecifiers = %lu\n", sizeof(x86DisassemblerInstrSpecifiers));
2376	printf(">>> x86DisassemblerContexts = %lu\n", sizeof(x86DisassemblerContexts));
2377	printf(">>> modRMTable = %lu\n", sizeof(modRMTable));
2378	printf(">>> x86DisassemblerOneByteOpcodes = %lu\n", sizeof(x86DisassemblerOneByteOpcodes));
2379	printf(">>> x86DisassemblerTwoByteOpcodes = %lu\n", sizeof(x86DisassemblerTwoByteOpcodes));
2380	printf(">>> x86DisassemblerThreeByte38Opcodes = %lu\n", sizeof(x86DisassemblerThreeByte38Opcodes));
2381	printf(">>> x86DisassemblerThreeByte3AOpcodes = %lu\n", sizeof(x86DisassemblerThreeByte3AOpcodes));
2382	printf(">>> x86DisassemblerThreeByteA6Opcodes = %lu\n", sizeof(x86DisassemblerThreeByteA6Opcodes));
2383	printf(">>> x86DisassemblerThreeByteA7Opcodes= %lu\n", sizeof(x86DisassemblerThreeByteA7Opcodes));
2384	printf(">>> x86DisassemblerXOP8Opcodes = %lu\n", sizeof(x86DisassemblerXOP8Opcodes));
2385	printf(">>> x86DisassemblerXOP9Opcodes = %lu\n", sizeof(x86DisassemblerXOP9Opcodes));
2386	printf(">>> x86DisassemblerXOPAOpcodes = %lu\n\n", sizeof(x86DisassemblerXOPAOpcodes));
2387#endif
2388
2389	return 0;
2390}
2391
2392#endif
2393
2394