1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 *     * Redistributions of source code must retain the above copyright notice,
10 *       this list of conditions and the following disclaimer.
11 *     * Redistributions in binary form must reproduce the above copyright notice,
12 *       this list of conditions and the following disclaimer in the documentation
13 *       and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26#include "udint.h"
27#include "types.h"
28#include "input.h"
29#include "decode.h"
30
31#ifndef __UD_STANDALONE__
32# include <string.h>
33#endif /* __UD_STANDALONE__ */
34
35/* The max number of prefixes to an instruction */
36#define MAX_PREFIXES    15
37
38/* rex prefix bits */
39#define REX_W(r)        ( ( 0xF & ( r ) )  >> 3 )
40#define REX_R(r)        ( ( 0x7 & ( r ) )  >> 2 )
41#define REX_X(r)        ( ( 0x3 & ( r ) )  >> 1 )
42#define REX_B(r)        ( ( 0x1 & ( r ) )  >> 0 )
43#define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
44                          ( P_REXR(n) << 2 ) | \
45                          ( P_REXX(n) << 1 ) | \
46                          ( P_REXB(n) << 0 ) )
47
48/* scable-index-base bits */
49#define SIB_S(b)        ( ( b ) >> 6 )
50#define SIB_I(b)        ( ( ( b ) >> 3 ) & 7 )
51#define SIB_B(b)        ( ( b ) & 7 )
52
53/* modrm bits */
54#define MODRM_REG(b)    ( ( ( b ) >> 3 ) & 7 )
55#define MODRM_NNN(b)    ( ( ( b ) >> 3 ) & 7 )
56#define MODRM_MOD(b)    ( ( ( b ) >> 6 ) & 3 )
57#define MODRM_RM(b)     ( ( b ) & 7 )
58
59static int decode_ext(struct ud *u, uint16_t ptr);
60
61enum reg_class { /* register classes */
62  REGCLASS_NONE,
63  REGCLASS_GPR,
64  REGCLASS_MMX,
65  REGCLASS_CR,
66  REGCLASS_DB,
67  REGCLASS_SEG,
68  REGCLASS_XMM
69};
70
71
72/*
73 * inp_uint8
74 * int_uint16
75 * int_uint32
76 * int_uint64
77 *    Load little-endian values from input
78 */
79static uint8_t
80inp_uint8(struct ud* u)
81{
82  return ud_inp_next(u);
83}
84
85static uint16_t
86inp_uint16(struct ud* u)
87{
88  uint16_t r, ret;
89
90  ret = ud_inp_next(u);
91  r = ud_inp_next(u);
92  return ret | (r << 8);
93}
94
95static uint32_t
96inp_uint32(struct ud* u)
97{
98  uint32_t r, ret;
99
100  ret = ud_inp_next(u);
101  r = ud_inp_next(u);
102  ret = ret | (r << 8);
103  r = ud_inp_next(u);
104  ret = ret | (r << 16);
105  r = ud_inp_next(u);
106  return ret | (r << 24);
107}
108
109static uint64_t
110inp_uint64(struct ud* u)
111{
112  uint64_t r, ret;
113
114  ret = ud_inp_next(u);
115  r = ud_inp_next(u);
116  ret = ret | (r << 8);
117  r = ud_inp_next(u);
118  ret = ret | (r << 16);
119  r = ud_inp_next(u);
120  ret = ret | (r << 24);
121  r = ud_inp_next(u);
122  ret = ret | (r << 32);
123  r = ud_inp_next(u);
124  ret = ret | (r << 40);
125  r = ud_inp_next(u);
126  ret = ret | (r << 48);
127  r = ud_inp_next(u);
128  return ret | (r << 56);
129}
130
131
132static inline int
133eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
134{
135  if (dis_mode == 64) {
136    return rex_w ? 64 : (pfx_opr ? 16 : 32);
137  } else if (dis_mode == 32) {
138    return pfx_opr ? 16 : 32;
139  } else {
140    UD_ASSERT(dis_mode == 16);
141    return pfx_opr ? 32 : 16;
142  }
143}
144
145
146static inline int
147eff_adr_mode(int dis_mode, int pfx_adr)
148{
149  if (dis_mode == 64) {
150    return pfx_adr ? 32 : 64;
151  } else if (dis_mode == 32) {
152    return pfx_adr ? 16 : 32;
153  } else {
154    UD_ASSERT(dis_mode == 16);
155    return pfx_adr ? 32 : 16;
156  }
157}
158
159
160/* Looks up mnemonic code in the mnemonic string table
161 * Returns NULL if the mnemonic code is invalid
162 */
163const char*
164ud_lookup_mnemonic(enum ud_mnemonic_code c)
165{
166  if (c < UD_MAX_MNEMONIC_CODE) {
167    return ud_mnemonics_str[c];
168  } else {
169    return NULL;
170  }
171}
172
173
174/*
175 * decode_prefixes
176 *
177 *  Extracts instruction prefixes.
178 */
179static int
180decode_prefixes(struct ud *u)
181{
182  int done = 0;
183  uint8_t curr;
184  UD_RETURN_ON_ERROR(u);
185
186  do {
187    ud_inp_next(u);
188    UD_RETURN_ON_ERROR(u);
189    if (inp_len(u) == MAX_INSN_LENGTH) {
190      UD_RETURN_WITH_ERROR(u, "max instruction length");
191    }
192    curr = inp_curr(u);
193
194    switch (curr)
195    {
196    case 0x2E :
197      u->pfx_seg = UD_R_CS;
198      break;
199    case 0x36 :
200      u->pfx_seg = UD_R_SS;
201      break;
202    case 0x3E :
203      u->pfx_seg = UD_R_DS;
204      break;
205    case 0x26 :
206      u->pfx_seg = UD_R_ES;
207      break;
208    case 0x64 :
209      u->pfx_seg = UD_R_FS;
210      break;
211    case 0x65 :
212      u->pfx_seg = UD_R_GS;
213      break;
214    case 0x67 : /* adress-size override prefix */
215      u->pfx_adr = 0x67;
216      break;
217    case 0xF0 :
218      u->pfx_lock = 0xF0;
219      break;
220    case 0x66:
221      u->pfx_opr = 0x66;
222      break;
223    case 0xF2:
224      u->pfx_str = 0xf2;
225      break;
226    case 0xF3:
227      u->pfx_str = 0xf3;
228      break;
229    default:
230      done = 1;
231      break;
232    }
233  } while (!done);
234
235  if (u->dis_mode == 64 && (curr & 0xF0) == 0x40) {
236    /* rex prefixes in 64bit mode, must be the last prefix
237     */
238    u->pfx_rex = curr;
239  } else {
240    /* rewind back one byte in stream, since the above loop
241     * stops with a non-prefix byte.
242     */
243    inp_back(u);
244  }
245  return 0;
246}
247
248
249static inline unsigned int modrm( struct ud * u )
250{
251    if ( !u->have_modrm ) {
252        u->modrm = ud_inp_next( u );
253        u->have_modrm = 1;
254    }
255    return u->modrm;
256}
257
258
259static unsigned int
260resolve_operand_size( const struct ud * u, unsigned int s )
261{
262    switch ( s )
263    {
264    case SZ_V:
265        return ( u->opr_mode );
266    case SZ_Z:
267        return ( u->opr_mode == 16 ) ? 16 : 32;
268    case SZ_Y:
269        return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
270    case SZ_RDQ:
271        return ( u->dis_mode == 64 ) ? 64 : 32;
272    default:
273        return s;
274    }
275}
276
277
278static int resolve_mnemonic( struct ud* u )
279{
280  /* resolve 3dnow weirdness. */
281  if ( u->mnemonic == UD_I3dnow ) {
282    u->mnemonic = ud_itab[ u->le->table[ inp_curr( u )  ] ].mnemonic;
283  }
284  /* SWAPGS is only valid in 64bits mode */
285  if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
286    UDERR(u, "swapgs invalid in 64bits mode");
287    return -1;
288  }
289
290  if (u->mnemonic == UD_Ixchg) {
291    if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
292         u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
293        (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
294         u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
295      u->operand[0].type = UD_NONE;
296      u->operand[1].type = UD_NONE;
297      u->mnemonic = UD_Inop;
298    }
299  }
300
301  if (u->mnemonic == UD_Inop && u->pfx_repe) {
302    u->pfx_repe = 0;
303    u->mnemonic = UD_Ipause;
304  }
305  return 0;
306}
307
308
309/* -----------------------------------------------------------------------------
310 * decode_a()- Decodes operands of the type seg:offset
311 * -----------------------------------------------------------------------------
312 */
313static void
314decode_a(struct ud* u, struct ud_operand *op)
315{
316  if (u->opr_mode == 16) {
317    /* seg16:off16 */
318    op->type = UD_OP_PTR;
319    op->size = 32;
320    op->lval.ptr.off = inp_uint16(u);
321    op->lval.ptr.seg = inp_uint16(u);
322  } else {
323    /* seg16:off32 */
324    op->type = UD_OP_PTR;
325    op->size = 48;
326    op->lval.ptr.off = inp_uint32(u);
327    op->lval.ptr.seg = inp_uint16(u);
328  }
329}
330
331/* -----------------------------------------------------------------------------
332 * decode_gpr() - Returns decoded General Purpose Register
333 * -----------------------------------------------------------------------------
334 */
335static enum ud_type
336decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
337{
338  switch (s) {
339    case 64:
340        return UD_R_RAX + rm;
341    case 32:
342        return UD_R_EAX + rm;
343    case 16:
344        return UD_R_AX  + rm;
345    case  8:
346        if (u->dis_mode == 64 && u->pfx_rex) {
347            if (rm >= 4)
348                return UD_R_SPL + (rm-4);
349            return UD_R_AL + rm;
350        } else return UD_R_AL + rm;
351    default:
352        UD_ASSERT(!"invalid operand size");
353        return 0;
354  }
355}
356
357static void
358decode_reg(struct ud *u,
359           struct ud_operand *opr,
360           int type,
361           int num,
362           int size)
363{
364  int reg;
365  size = resolve_operand_size(u, size);
366  switch (type) {
367    case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
368    case REGCLASS_MMX : reg = UD_R_MM0  + (num & 7); break;
369    case REGCLASS_XMM : reg = UD_R_XMM0 + num; break;
370    case REGCLASS_CR : reg = UD_R_CR0  + num; break;
371    case REGCLASS_DB : reg = UD_R_DR0  + num; break;
372    case REGCLASS_SEG : {
373      /*
374       * Only 6 segment registers, anything else is an error.
375       */
376      if ((num & 7) > 5) {
377        UDERR(u, "invalid segment register value");
378        return;
379      } else {
380        reg = UD_R_ES + (num & 7);
381      }
382      break;
383    }
384    default:
385      UD_ASSERT(!"invalid register type");
386      break;
387  }
388  opr->type = UD_OP_REG;
389  opr->base = reg;
390  opr->size = size;
391}
392
393
394/*
395 * decode_imm
396 *
397 *    Decode Immediate values.
398 */
399static void
400decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
401{
402  op->size = resolve_operand_size(u, size);
403  op->type = UD_OP_IMM;
404
405  switch (op->size) {
406  case  8: op->lval.sbyte = inp_uint8(u);   break;
407  case 16: op->lval.uword = inp_uint16(u);  break;
408  case 32: op->lval.udword = inp_uint32(u); break;
409  case 64: op->lval.uqword = inp_uint64(u); break;
410  default: return;
411  }
412}
413
414
415/*
416 * decode_mem_disp
417 *
418 *    Decode mem address displacement.
419 */
420static void
421decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
422{
423  switch (size) {
424  case 8:
425    op->offset = 8;
426    op->lval.ubyte  = inp_uint8(u);
427    break;
428  case 16:
429    op->offset = 16;
430    op->lval.uword  = inp_uint16(u);
431    break;
432  case 32:
433    op->offset = 32;
434    op->lval.udword = inp_uint32(u);
435    break;
436  case 64:
437    op->offset = 64;
438    op->lval.uqword = inp_uint64(u);
439    break;
440  default:
441      return;
442  }
443}
444
445
446/*
447 * decode_modrm_reg
448 *
449 *    Decodes reg field of mod/rm byte
450 *
451 */
452static inline void
453decode_modrm_reg(struct ud         *u,
454                 struct ud_operand *operand,
455                 unsigned int       type,
456                 unsigned int       size)
457{
458  uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
459  decode_reg(u, operand, type, reg, size);
460}
461
462
463/*
464 * decode_modrm_rm
465 *
466 *    Decodes rm field of mod/rm byte
467 *
468 */
469static void
470decode_modrm_rm(struct ud         *u,
471                struct ud_operand *op,
472                unsigned char      type,    /* register type */
473                unsigned int       size)    /* operand size */
474
475{
476  size_t offset = 0;
477  unsigned char mod, rm;
478
479  /* get mod, r/m and reg fields */
480  mod = MODRM_MOD(modrm(u));
481  rm  = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
482
483  /*
484   * If mod is 11b, then the modrm.rm specifies a register.
485   *
486   */
487  if (mod == 3) {
488    decode_reg(u, op, type, rm, size);
489    return;
490  }
491
492  /*
493   * !11b => Memory Address
494   */
495  op->type = UD_OP_MEM;
496  op->size = resolve_operand_size(u, size);
497
498  if (u->adr_mode == 64) {
499    op->base = UD_R_RAX + rm;
500    if (mod == 1) {
501      offset = 8;
502    } else if (mod == 2) {
503      offset = 32;
504    } else if (mod == 0 && (rm & 7) == 5) {
505      op->base = UD_R_RIP;
506      offset = 32;
507    } else {
508      offset = 0;
509    }
510    /*
511     * Scale-Index-Base (SIB)
512     */
513    if ((rm & 7) == 4) {
514      ud_inp_next(u);
515
516      op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
517      op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
518      op->base  = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
519
520      /* special conditions for base reference */
521      if (op->index == UD_R_RSP) {
522        op->index = UD_NONE;
523        op->scale = UD_NONE;
524      }
525
526      if (op->base == UD_R_RBP || op->base == UD_R_R13) {
527        if (mod == 0) {
528          op->base = UD_NONE;
529        }
530        if (mod == 1) {
531          offset = 8;
532        } else {
533          offset = 32;
534        }
535      }
536    }
537  } else if (u->adr_mode == 32) {
538    op->base = UD_R_EAX + rm;
539    if (mod == 1) {
540      offset = 8;
541    } else if (mod == 2) {
542      offset = 32;
543    } else if (mod == 0 && rm == 5) {
544      op->base = UD_NONE;
545      offset = 32;
546    } else {
547      offset = 0;
548    }
549
550    /* Scale-Index-Base (SIB) */
551    if ((rm & 7) == 4) {
552      ud_inp_next(u);
553
554      op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
555      op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
556      op->base  = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
557
558      if (op->index == UD_R_ESP) {
559        op->index = UD_NONE;
560        op->scale = UD_NONE;
561      }
562
563      /* special condition for base reference */
564      if (op->base == UD_R_EBP) {
565        if (mod == 0) {
566          op->base = UD_NONE;
567        }
568        if (mod == 1) {
569          offset = 8;
570        } else {
571          offset = 32;
572        }
573      }
574    }
575  } else {
576    const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
577                                     UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
578    const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
579                                     UD_NONE, UD_NONE, UD_NONE, UD_NONE };
580    op->base  = bases[rm & 7];
581    op->index = indices[rm & 7];
582    if (mod == 0 && rm == 6) {
583      offset = 16;
584      op->base = UD_NONE;
585    } else if (mod == 1) {
586      offset = 8;
587    } else if (mod == 2) {
588      offset = 16;
589    }
590  }
591
592  if (offset) {
593    decode_mem_disp(u, offset, op);
594  }
595}
596
597
598/*
599 * decode_moffset
600 *    Decode offset-only memory operand
601 */
602static void
603decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
604{
605  opr->type = UD_OP_MEM;
606  opr->size = resolve_operand_size(u, size);
607  decode_mem_disp(u, u->adr_mode, opr);
608}
609
610
611/* -----------------------------------------------------------------------------
612 * decode_operands() - Disassembles Operands.
613 * -----------------------------------------------------------------------------
614 */
615static int
616decode_operand(struct ud           *u,
617               struct ud_operand   *operand,
618               enum ud_operand_code type,
619               unsigned int         size)
620{
621  operand->_oprcode = type;
622
623  switch (type) {
624    case OP_A :
625      decode_a(u, operand);
626      break;
627    case OP_MR:
628      decode_modrm_rm(u, operand, REGCLASS_GPR,
629                      MODRM_MOD(modrm(u)) == 3 ?
630                        Mx_reg_size(size) : Mx_mem_size(size));
631      break;
632    case OP_F:
633      u->br_far  = 1;
634      /* intended fall through */
635    case OP_M:
636      if (MODRM_MOD(modrm(u)) == 3) {
637        UDERR(u, "expected modrm.mod != 3");
638      }
639      /* intended fall through */
640    case OP_E:
641      decode_modrm_rm(u, operand, REGCLASS_GPR, size);
642      break;
643    case OP_G:
644      decode_modrm_reg(u, operand, REGCLASS_GPR, size);
645      break;
646    case OP_sI:
647    case OP_I:
648      decode_imm(u, size, operand);
649      break;
650    case OP_I1:
651      operand->type = UD_OP_CONST;
652      operand->lval.udword = 1;
653      break;
654    case OP_N:
655      if (MODRM_MOD(modrm(u)) != 3) {
656        UDERR(u, "expected modrm.mod == 3");
657      }
658      /* intended fall through */
659    case OP_Q:
660      decode_modrm_rm(u, operand, REGCLASS_MMX, size);
661      break;
662    case OP_P:
663      decode_modrm_reg(u, operand, REGCLASS_MMX, size);
664      break;
665    case OP_U:
666      if (MODRM_MOD(modrm(u)) != 3) {
667        UDERR(u, "expected modrm.mod == 3");
668      }
669      /* intended fall through */
670    case OP_W:
671      decode_modrm_rm(u, operand, REGCLASS_XMM, size);
672      break;
673    case OP_V:
674      decode_modrm_reg(u, operand, REGCLASS_XMM, size);
675      break;
676    case OP_MU:
677      decode_modrm_rm(u, operand, REGCLASS_XMM,
678                      MODRM_MOD(modrm(u)) == 3 ?
679                        Mx_reg_size(size) : Mx_mem_size(size));
680      break;
681    case OP_S:
682      decode_modrm_reg(u, operand, REGCLASS_SEG, size);
683      break;
684    case OP_O:
685      decode_moffset(u, size, operand);
686      break;
687    case OP_R0:
688    case OP_R1:
689    case OP_R2:
690    case OP_R3:
691    case OP_R4:
692    case OP_R5:
693    case OP_R6:
694    case OP_R7:
695      decode_reg(u, operand, REGCLASS_GPR,
696                 (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size);
697      break;
698    case OP_AL:
699    case OP_AX:
700    case OP_eAX:
701    case OP_rAX:
702      decode_reg(u, operand, REGCLASS_GPR, 0, size);
703      break;
704    case OP_CL:
705    case OP_CX:
706    case OP_eCX:
707      decode_reg(u, operand, REGCLASS_GPR, 1, size);
708      break;
709    case OP_DL:
710    case OP_DX:
711    case OP_eDX:
712      decode_reg(u, operand, REGCLASS_GPR, 2, size);
713      break;
714    case OP_ES:
715    case OP_CS:
716    case OP_DS:
717    case OP_SS:
718    case OP_FS:
719    case OP_GS:
720      /* in 64bits mode, only fs and gs are allowed */
721      if (u->dis_mode == 64) {
722        if (type != OP_FS && type != OP_GS) {
723          UDERR(u, "invalid segment register in 64bits");
724        }
725      }
726      operand->type = UD_OP_REG;
727      operand->base = (type - OP_ES) + UD_R_ES;
728      operand->size = 16;
729      break;
730    case OP_J :
731      decode_imm(u, size, operand);
732      operand->type = UD_OP_JIMM;
733      break ;
734    case OP_R :
735      if (MODRM_MOD(modrm(u)) != 3) {
736        UDERR(u, "expected modrm.mod == 3");
737      }
738      decode_modrm_rm(u, operand, REGCLASS_GPR, size);
739      break;
740    case OP_C:
741      decode_modrm_reg(u, operand, REGCLASS_CR, size);
742      break;
743    case OP_D:
744      decode_modrm_reg(u, operand, REGCLASS_DB, size);
745      break;
746    case OP_I3 :
747      operand->type = UD_OP_CONST;
748      operand->lval.sbyte = 3;
749      break;
750    case OP_ST0:
751    case OP_ST1:
752    case OP_ST2:
753    case OP_ST3:
754    case OP_ST4:
755    case OP_ST5:
756    case OP_ST6:
757    case OP_ST7:
758      operand->type = UD_OP_REG;
759      operand->base = (type - OP_ST0) + UD_R_ST0;
760      operand->size = 80;
761      break;
762    default :
763      break;
764  }
765  return 0;
766}
767
768
769/*
770 * decode_operands
771 *
772 *    Disassemble upto 3 operands of the current instruction being
773 *    disassembled. By the end of the function, the operand fields
774 *    of the ud structure will have been filled.
775 */
776static int
777decode_operands(struct ud* u)
778{
779  decode_operand(u, &u->operand[0],
780                    u->itab_entry->operand1.type,
781                    u->itab_entry->operand1.size);
782  decode_operand(u, &u->operand[1],
783                    u->itab_entry->operand2.type,
784                    u->itab_entry->operand2.size);
785  decode_operand(u, &u->operand[2],
786                    u->itab_entry->operand3.type,
787                    u->itab_entry->operand3.size);
788  return 0;
789}
790
791/* -----------------------------------------------------------------------------
792 * clear_insn() - clear instruction structure
793 * -----------------------------------------------------------------------------
794 */
795static void
796clear_insn(register struct ud* u)
797{
798  u->error     = 0;
799  u->pfx_seg   = 0;
800  u->pfx_opr   = 0;
801  u->pfx_adr   = 0;
802  u->pfx_lock  = 0;
803  u->pfx_repne = 0;
804  u->pfx_rep   = 0;
805  u->pfx_repe  = 0;
806  u->pfx_rex   = 0;
807  u->pfx_str   = 0;
808  u->mnemonic  = UD_Inone;
809  u->itab_entry = NULL;
810  u->have_modrm = 0;
811  u->br_far    = 0;
812
813  memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
814  memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
815  memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
816}
817
818
819static inline int
820resolve_pfx_str(struct ud* u)
821{
822  if (u->pfx_str == 0xf3) {
823    if (P_STR(u->itab_entry->prefix)) {
824        u->pfx_rep  = 0xf3;
825    } else {
826        u->pfx_repe = 0xf3;
827    }
828  } else if (u->pfx_str == 0xf2) {
829    u->pfx_repne = 0xf3;
830  }
831  return 0;
832}
833
834
835static int
836resolve_mode( struct ud* u )
837{
838  /* if in error state, bail out */
839  if ( u->error ) return -1;
840
841  /* propagate prefix effects */
842  if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
843
844    /* Check validity of  instruction m64 */
845    if ( P_INV64( u->itab_entry->prefix ) ) {
846      UDERR(u, "instruction invalid in 64bits");
847      return -1;
848    }
849
850    /* effective rex prefix is the  effective mask for the
851     * instruction hard-coded in the opcode map.
852     */
853    u->pfx_rex = ( u->pfx_rex & 0x40 ) |
854                 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
855
856    /* whether this instruction has a default operand size of
857     * 64bit, also hardcoded into the opcode map.
858     */
859    u->default64 = P_DEF64( u->itab_entry->prefix );
860    /* calculate effective operand size */
861    if ( REX_W( u->pfx_rex ) ) {
862        u->opr_mode = 64;
863    } else if ( u->pfx_opr ) {
864        u->opr_mode = 16;
865    } else {
866        /* unless the default opr size of instruction is 64,
867         * the effective operand size in the absence of rex.w
868         * prefix is 32.
869         */
870        u->opr_mode = ( u->default64 ) ? 64 : 32;
871    }
872
873    /* calculate effective address size */
874    u->adr_mode = (u->pfx_adr) ? 32 : 64;
875  } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
876    u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
877    u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
878  } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
879    u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
880    u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
881  }
882
883  /* set flags for implicit addressing */
884  u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
885
886  return 0;
887}
888
889
890static inline int
891decode_insn(struct ud *u, uint16_t ptr)
892{
893  UD_ASSERT((ptr & 0x8000) == 0);
894  u->itab_entry = &ud_itab[ ptr ];
895  u->mnemonic = u->itab_entry->mnemonic;
896  return (resolve_pfx_str(u)  == 0 &&
897          resolve_mode(u)     == 0 &&
898          decode_operands(u)  == 0 &&
899          resolve_mnemonic(u) == 0) ? 0 : -1;
900}
901
902
903/*
904 * decode_3dnow()
905 *
906 *    Decoding 3dnow is a little tricky because of its strange opcode
907 *    structure. The final opcode disambiguation depends on the last
908 *    byte that comes after the operands have been decoded. Fortunately,
909 *    all 3dnow instructions have the same set of operand types. So we
910 *    go ahead and decode the instruction by picking an arbitrarily chosen
911 *    valid entry in the table, decode the operands, and read the final
912 *    byte to resolve the menmonic.
913 */
914static inline int
915decode_3dnow(struct ud* u)
916{
917  uint16_t ptr;
918  UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
919  UD_ASSERT(u->le->table[0xc] != 0);
920  decode_insn(u, u->le->table[0xc]);
921  ud_inp_next(u);
922  if (u->error) {
923    return -1;
924  }
925  ptr = u->le->table[inp_curr(u)];
926  UD_ASSERT((ptr & 0x8000) == 0);
927  u->mnemonic = ud_itab[ptr].mnemonic;
928  return 0;
929}
930
931
932static int
933decode_ssepfx(struct ud *u)
934{
935  uint8_t idx;
936  uint8_t pfx;
937
938  /*
939   * String prefixes (f2, f3) take precedence over operand
940   * size prefix (66).
941   */
942  pfx = u->pfx_str;
943  if (pfx == 0) {
944    pfx = u->pfx_opr;
945  }
946  idx = ((pfx & 0xf) + 1) / 2;
947  if (u->le->table[idx] == 0) {
948    idx = 0;
949  }
950  if (idx && u->le->table[idx] != 0) {
951    /*
952     * "Consume" the prefix as a part of the opcode, so it is no
953     * longer exported as an instruction prefix.
954     */
955    u->pfx_str = 0;
956    if (pfx == 0x66) {
957        /*
958         * consume "66" only if it was used for decoding, leaving
959         * it to be used as an operands size override for some
960         * simd instructions.
961         */
962        u->pfx_opr = 0;
963    }
964  }
965  return decode_ext(u, u->le->table[idx]);
966}
967
968
969/*
970 * decode_ext()
971 *
972 *    Decode opcode extensions (if any)
973 */
974static int
975decode_ext(struct ud *u, uint16_t ptr)
976{
977  uint8_t idx = 0;
978  if ((ptr & 0x8000) == 0) {
979    return decode_insn(u, ptr);
980  }
981  u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
982  if (u->le->type == UD_TAB__OPC_3DNOW) {
983    return decode_3dnow(u);
984  }
985
986  switch (u->le->type) {
987    case UD_TAB__OPC_MOD:
988      /* !11 = 0, 11 = 1 */
989      idx = (MODRM_MOD(modrm(u)) + 1) / 4;
990      break;
991      /* disassembly mode/operand size/address size based tables.
992       * 16 = 0,, 32 = 1, 64 = 2
993       */
994    case UD_TAB__OPC_MODE:
995      idx = u->dis_mode != 64 ? 0 : 1;
996      break;
997    case UD_TAB__OPC_OSIZE:
998      idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
999      break;
1000    case UD_TAB__OPC_ASIZE:
1001      idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1002      break;
1003    case UD_TAB__OPC_X87:
1004      idx = modrm(u) - 0xC0;
1005      break;
1006    case UD_TAB__OPC_VENDOR:
1007      if (u->vendor == UD_VENDOR_ANY) {
1008        /* choose a valid entry */
1009        idx = (u->le->table[idx] != 0) ? 0 : 1;
1010      } else if (u->vendor == UD_VENDOR_AMD) {
1011        idx = 0;
1012      } else {
1013        idx = 1;
1014      }
1015      break;
1016    case UD_TAB__OPC_RM:
1017      idx = MODRM_RM(modrm(u));
1018      break;
1019    case UD_TAB__OPC_REG:
1020      idx = MODRM_REG(modrm(u));
1021      break;
1022    case UD_TAB__OPC_SSE:
1023      return decode_ssepfx(u);
1024    default:
1025      UD_ASSERT(!"not reached");
1026      break;
1027  }
1028
1029  return decode_ext(u, u->le->table[idx]);
1030}
1031
1032
1033static int
1034decode_opcode(struct ud *u)
1035{
1036  uint16_t ptr;
1037  UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1038  ud_inp_next(u);
1039  if (u->error) {
1040    return -1;
1041  }
1042  u->primary_opcode = inp_curr(u);
1043  ptr = u->le->table[inp_curr(u)];
1044  if (ptr & 0x8000) {
1045    u->le = &ud_lookup_table_list[ptr & ~0x8000];
1046    if (u->le->type == UD_TAB__OPC_TABLE) {
1047      return decode_opcode(u);
1048    }
1049  }
1050  return decode_ext(u, ptr);
1051}
1052
1053
1054/* =============================================================================
1055 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1056 * =============================================================================
1057 */
1058unsigned int
1059ud_decode(struct ud *u)
1060{
1061  inp_start(u);
1062  clear_insn(u);
1063  u->le = &ud_lookup_table_list[0];
1064  u->error = decode_prefixes(u) == -1 ||
1065             decode_opcode(u)   == -1 ||
1066             u->error;
1067  /* Handle decode error. */
1068  if (u->error) {
1069    /* clear out the decode data. */
1070    clear_insn(u);
1071    /* mark the sequence of bytes as invalid. */
1072    u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1073    u->mnemonic = u->itab_entry->mnemonic;
1074  }
1075
1076    /* maybe this stray segment override byte
1077     * should be spewed out?
1078     */
1079    if ( !P_SEG( u->itab_entry->prefix ) &&
1080            u->operand[0].type != UD_OP_MEM &&
1081            u->operand[1].type != UD_OP_MEM )
1082        u->pfx_seg = 0;
1083
1084  u->insn_offset = u->pc; /* set offset of instruction */
1085  u->asm_buf_fill = 0;   /* set translation buffer index to 0 */
1086  u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1087
1088  /* return number of bytes disassembled. */
1089  return u->inp_ctr;
1090}
1091
1092/*
1093vim: set ts=2 sw=2 expandtab
1094*/
1095