1
2/*---------------------------------------------------------------*/
3/*--- begin                               guest_x86_helpers.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2015 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_emnote.h"
38#include "libvex_guest_x86.h"
39#include "libvex_ir.h"
40#include "libvex.h"
41
42#include "main_util.h"
43#include "main_globals.h"
44#include "guest_generic_bb_to_IR.h"
45#include "guest_x86_defs.h"
46#include "guest_generic_x87.h"
47
48
49/* This file contains helper functions for x86 guest code.
50   Calls to these functions are generated by the back end.
51   These calls are of course in the host machine code and
52   this file will be compiled to host machine code, so that
53   all makes sense.
54
55   Only change the signatures of these helper functions very
56   carefully.  If you change the signature here, you'll have to change
57   the parameters passed to it in the IR calls constructed by
58   guest-x86/toIR.c.
59
60   The convention used is that all functions called from generated
61   code are named x86g_<something>, and any function whose name lacks
62   that prefix is not called from generated code.  Note that some
63   LibVEX_* functions can however be called by VEX's client, but that
64   is not the same as calling them from VEX-generated code.
65*/
66
67
68/* Set to 1 to get detailed profiling info about use of the flag
69   machinery. */
70#define PROFILE_EFLAGS 0
71
72
73/*---------------------------------------------------------------*/
74/*--- %eflags run-time helpers.                               ---*/
75/*---------------------------------------------------------------*/
76
77static const UChar parity_table[256] = {
78    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110};
111
112/* generalised left-shifter */
113inline static Int lshift ( Int x, Int n )
114{
115   if (n >= 0)
116      return (UInt)x << n;
117   else
118      return x >> (-n);
119}
120
121/* identity on ULong */
122static inline ULong idULong ( ULong x )
123{
124   return x;
125}
126
127
128#define PREAMBLE(__data_bits)					\
129   /* const */ UInt DATA_MASK 					\
130      = __data_bits==8 ? 0xFF 					\
131                       : (__data_bits==16 ? 0xFFFF 		\
132                                          : 0xFFFFFFFF); 	\
133   /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
134   /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
135   /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
136   /* const */ UInt CC_NDEP = cc_ndep_formal;			\
137   /* Four bogus assignments, which hopefully gcc can     */	\
138   /* optimise away, and which stop it complaining about  */	\
139   /* unused variables.                                   */	\
140   SIGN_MASK = SIGN_MASK;					\
141   DATA_MASK = DATA_MASK;					\
142   CC_DEP2 = CC_DEP2;						\
143   CC_NDEP = CC_NDEP;
144
145
146/*-------------------------------------------------------------*/
147
148#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
149{								\
150   PREAMBLE(DATA_BITS);						\
151   { UInt cf, pf, af, zf, sf, of;				\
152     UInt argL, argR, res;					\
153     argL = CC_DEP1;						\
154     argR = CC_DEP2;						\
155     res  = argL + argR;					\
156     cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
157     pf = parity_table[(UChar)res];				\
158     af = (res ^ argL ^ argR) & 0x10;				\
159     zf = ((DATA_UTYPE)res == 0) << 6;				\
160     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
161     of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
162                 12 - DATA_BITS) & X86G_CC_MASK_O;		\
163     return cf | pf | af | zf | sf | of;			\
164   }								\
165}
166
167/*-------------------------------------------------------------*/
168
169#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
170{								\
171   PREAMBLE(DATA_BITS);						\
172   { UInt cf, pf, af, zf, sf, of;				\
173     UInt argL, argR, res;					\
174     argL = CC_DEP1;						\
175     argR = CC_DEP2;						\
176     res  = argL - argR;					\
177     cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
178     pf = parity_table[(UChar)res];				\
179     af = (res ^ argL ^ argR) & 0x10;				\
180     zf = ((DATA_UTYPE)res == 0) << 6;				\
181     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
182     of = lshift((argL ^ argR) & (argL ^ res),	 		\
183                 12 - DATA_BITS) & X86G_CC_MASK_O; 		\
184     return cf | pf | af | zf | sf | of;			\
185   }								\
186}
187
188/*-------------------------------------------------------------*/
189
190#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
191{								\
192   PREAMBLE(DATA_BITS);						\
193   { UInt cf, pf, af, zf, sf, of;				\
194     UInt argL, argR, oldC, res;		       		\
195     oldC = CC_NDEP & X86G_CC_MASK_C;				\
196     argL = CC_DEP1;						\
197     argR = CC_DEP2 ^ oldC;	       				\
198     res  = (argL + argR) + oldC;				\
199     if (oldC)							\
200        cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
201     else							\
202        cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
203     pf = parity_table[(UChar)res];				\
204     af = (res ^ argL ^ argR) & 0x10;				\
205     zf = ((DATA_UTYPE)res == 0) << 6;				\
206     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
207     of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
208                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
209     return cf | pf | af | zf | sf | of;			\
210   }								\
211}
212
213/*-------------------------------------------------------------*/
214
215#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
216{								\
217   PREAMBLE(DATA_BITS);						\
218   { UInt cf, pf, af, zf, sf, of;				\
219     UInt argL, argR, oldC, res;		       		\
220     oldC = CC_NDEP & X86G_CC_MASK_C;				\
221     argL = CC_DEP1;						\
222     argR = CC_DEP2 ^ oldC;	       				\
223     res  = (argL - argR) - oldC;				\
224     if (oldC)							\
225        cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
226     else							\
227        cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
228     pf = parity_table[(UChar)res];				\
229     af = (res ^ argL ^ argR) & 0x10;				\
230     zf = ((DATA_UTYPE)res == 0) << 6;				\
231     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
232     of = lshift((argL ^ argR) & (argL ^ res), 			\
233                 12 - DATA_BITS) & X86G_CC_MASK_O;		\
234     return cf | pf | af | zf | sf | of;			\
235   }								\
236}
237
238/*-------------------------------------------------------------*/
239
240#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
241{								\
242   PREAMBLE(DATA_BITS);						\
243   { UInt cf, pf, af, zf, sf, of;				\
244     cf = 0;							\
245     pf = parity_table[(UChar)CC_DEP1];				\
246     af = 0;							\
247     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
248     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
249     of = 0;							\
250     return cf | pf | af | zf | sf | of;			\
251   }								\
252}
253
254/*-------------------------------------------------------------*/
255
256#define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
257{								\
258   PREAMBLE(DATA_BITS);						\
259   { UInt cf, pf, af, zf, sf, of;				\
260     UInt argL, argR, res;					\
261     res  = CC_DEP1;						\
262     argL = res - 1;						\
263     argR = 1;							\
264     cf = CC_NDEP & X86G_CC_MASK_C;				\
265     pf = parity_table[(UChar)res];				\
266     af = (res ^ argL ^ argR) & 0x10;				\
267     zf = ((DATA_UTYPE)res == 0) << 6;				\
268     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
269     of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
270     return cf | pf | af | zf | sf | of;			\
271   }								\
272}
273
274/*-------------------------------------------------------------*/
275
276#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
277{								\
278   PREAMBLE(DATA_BITS);						\
279   { UInt cf, pf, af, zf, sf, of;				\
280     UInt argL, argR, res;					\
281     res  = CC_DEP1;						\
282     argL = res + 1;						\
283     argR = 1;							\
284     cf = CC_NDEP & X86G_CC_MASK_C;				\
285     pf = parity_table[(UChar)res];				\
286     af = (res ^ argL ^ argR) & 0x10;				\
287     zf = ((DATA_UTYPE)res == 0) << 6;				\
288     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
289     of = ((res & DATA_MASK) 					\
290          == ((UInt)SIGN_MASK - 1)) << 11;			\
291     return cf | pf | af | zf | sf | of;			\
292   }								\
293}
294
295/*-------------------------------------------------------------*/
296
297#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
298{								\
299   PREAMBLE(DATA_BITS);						\
300   { UInt cf, pf, af, zf, sf, of;				\
301     cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
302     pf = parity_table[(UChar)CC_DEP1];				\
303     af = 0; /* undefined */					\
304     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
305     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
306     /* of is defined if shift count == 1 */			\
307     of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
308          & X86G_CC_MASK_O;					\
309     return cf | pf | af | zf | sf | of;			\
310   }								\
311}
312
313/*-------------------------------------------------------------*/
314
315#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
316{								\
317   PREAMBLE(DATA_BITS);  					\
318   { UInt cf, pf, af, zf, sf, of;				\
319     cf = CC_DEP2 & 1;						\
320     pf = parity_table[(UChar)CC_DEP1];				\
321     af = 0; /* undefined */					\
322     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
323     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
324     /* of is defined if shift count == 1 */			\
325     of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
326          & X86G_CC_MASK_O;					\
327     return cf | pf | af | zf | sf | of;			\
328   }								\
329}
330
331/*-------------------------------------------------------------*/
332
333/* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
334/* DEP1 = result, NDEP = old flags */
335#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
336{								\
337   PREAMBLE(DATA_BITS);						\
338   { UInt fl 							\
339        = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
340          | (X86G_CC_MASK_C & CC_DEP1)				\
341          | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
342                                      11-(DATA_BITS-1)) 	\
343                     ^ lshift(CC_DEP1, 11)));			\
344     return fl;							\
345   }								\
346}
347
348/*-------------------------------------------------------------*/
349
350/* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
351/* DEP1 = result, NDEP = old flags */
352#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
353{								\
354   PREAMBLE(DATA_BITS);						\
355   { UInt fl 							\
356        = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
357          | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
358          | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
359                                      11-(DATA_BITS-1)) 	\
360                     ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
361     return fl;							\
362   }								\
363}
364
365/*-------------------------------------------------------------*/
366
367#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
368                                DATA_U2TYPE, NARROWto2U)        \
369{                                                               \
370   PREAMBLE(DATA_BITS);                                         \
371   { UInt cf, pf, af, zf, sf, of;                               \
372     DATA_UTYPE  hi;                                            \
373     DATA_UTYPE  lo                                             \
374        = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
375                     * ((DATA_UTYPE)CC_DEP2) );                 \
376     DATA_U2TYPE rr                                             \
377        = NARROWto2U(                                           \
378             ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
379             * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
380     hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
381     cf = (hi != 0);                                            \
382     pf = parity_table[(UChar)lo];                              \
383     af = 0; /* undefined */                                    \
384     zf = (lo == 0) << 6;                                       \
385     sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
386     of = cf << 11;                                             \
387     return cf | pf | af | zf | sf | of;                        \
388   }								\
389}
390
391/*-------------------------------------------------------------*/
392
393#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
394                                DATA_S2TYPE, NARROWto2S)        \
395{                                                               \
396   PREAMBLE(DATA_BITS);                                         \
397   { UInt cf, pf, af, zf, sf, of;                               \
398     DATA_STYPE  hi;                                            \
399     DATA_STYPE  lo                                             \
400        = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
401                     * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
402     DATA_S2TYPE rr                                             \
403        = NARROWto2S(                                           \
404             ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
405             * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
406     hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
407     cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
408     pf = parity_table[(UChar)lo];                              \
409     af = 0; /* undefined */                                    \
410     zf = (lo == 0) << 6;                                       \
411     sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
412     of = cf << 11;                                             \
413     return cf | pf | af | zf | sf | of;                        \
414   }								\
415}
416
417
418#if PROFILE_EFLAGS
419
420static Bool initted     = False;
421
422/* C flag, fast route */
423static UInt tabc_fast[X86G_CC_OP_NUMBER];
424/* C flag, slow route */
425static UInt tabc_slow[X86G_CC_OP_NUMBER];
426/* table for calculate_cond */
427static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428/* total entry counts for calc_all, calc_c, calc_cond. */
429static UInt n_calc_all  = 0;
430static UInt n_calc_c    = 0;
431static UInt n_calc_cond = 0;
432
433#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434
435
436static void showCounts ( void )
437{
438   Int op, co;
439   HChar ch;
440   vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
441              n_calc_all, n_calc_cond, n_calc_c);
442
443   vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
444              "    S   NS    P   NP    L   NL   LE  NLE\n");
445   vex_printf("     -----------------------------------------------------"
446              "----------------------------------------\n");
447   for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448
449      ch = ' ';
450      if (op > 0 && (op-1) % 3 == 0)
451         ch = 'B';
452      if (op > 0 && (op-1) % 3 == 1)
453         ch = 'W';
454      if (op > 0 && (op-1) % 3 == 2)
455         ch = 'L';
456
457      vex_printf("%2d%c: ", op, ch);
458      vex_printf("%6u ", tabc_slow[op]);
459      vex_printf("%6u ", tabc_fast[op]);
460      for (co = 0; co < 16; co++) {
461         Int n = tab_cond[op][co];
462         if (n >= 1000) {
463            vex_printf(" %3dK", n / 1000);
464         } else
465         if (n >= 0) {
466            vex_printf(" %3d ", n );
467         } else {
468            vex_printf("     ");
469         }
470      }
471      vex_printf("\n");
472   }
473   vex_printf("\n");
474}
475
476static void initCounts ( void )
477{
478   Int op, co;
479   initted = True;
480   for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481      tabc_fast[op] = tabc_slow[op] = 0;
482      for (co = 0; co < 16; co++)
483         tab_cond[op][co] = 0;
484   }
485}
486
487#endif /* PROFILE_EFLAGS */
488
489
490/* CALLED FROM GENERATED CODE: CLEAN HELPER */
491/* Calculate all the 6 flags from the supplied thunk parameters.
492   Worker function, not directly called from generated code. */
493static
494UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495                                     UInt cc_dep1_formal,
496                                     UInt cc_dep2_formal,
497                                     UInt cc_ndep_formal )
498{
499   switch (cc_op) {
500      case X86G_CC_OP_COPY:
501         return cc_dep1_formal
502                & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503                   | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504
505      case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
506      case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
507      case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
508
509      case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
510      case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
511      case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
512
513      case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
514      case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
515      case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
516
517      case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
518      case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
519      case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
520
521      case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
522      case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523      case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
524
525      case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
526      case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
527      case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
528
529      case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
530      case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
531      case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
532
533      case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
534      case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
535      case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
536
537      case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
538      case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
539      case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
540
541      case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
542      case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
543      case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
544
545      case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
546      case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
547      case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
548
549      case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
550                                                UShort, toUShort );
551      case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
552                                                UInt,   toUInt );
553      case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
554                                                ULong,  idULong );
555
556      case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
557                                                Short,  toUShort );
558      case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
559                                                Int,    toUInt   );
560      case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
561                                                Long,   idULong );
562
563      default:
564         /* shouldn't really make these calls from generated code */
565         vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566                    "( %u, 0x%x, 0x%x, 0x%x )\n",
567                    cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568         vpanic("x86g_calculate_eflags_all_WRK(X86)");
569   }
570}
571
572
573/* CALLED FROM GENERATED CODE: CLEAN HELPER */
574/* Calculate all the 6 flags from the supplied thunk parameters. */
575UInt x86g_calculate_eflags_all ( UInt cc_op,
576                                 UInt cc_dep1,
577                                 UInt cc_dep2,
578                                 UInt cc_ndep )
579{
580#  if PROFILE_EFLAGS
581   if (!initted) initCounts();
582   n_calc_all++;
583   if (SHOW_COUNTS_NOW) showCounts();
584#  endif
585   return
586      x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587}
588
589
590/* CALLED FROM GENERATED CODE: CLEAN HELPER */
591/* Calculate just the carry flag from the supplied thunk parameters. */
592VEX_REGPARM(3)
593UInt x86g_calculate_eflags_c ( UInt cc_op,
594                               UInt cc_dep1,
595                               UInt cc_dep2,
596                               UInt cc_ndep )
597{
598#  if PROFILE_EFLAGS
599   if (!initted) initCounts();
600   n_calc_c++;
601   tabc_fast[cc_op]++;
602   if (SHOW_COUNTS_NOW) showCounts();
603#  endif
604
605   /* Fast-case some common ones. */
606   switch (cc_op) {
607      case X86G_CC_OP_LOGICL:
608      case X86G_CC_OP_LOGICW:
609      case X86G_CC_OP_LOGICB:
610         return 0;
611      case X86G_CC_OP_SUBL:
612         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613                   ? X86G_CC_MASK_C : 0;
614      case X86G_CC_OP_SUBW:
615         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616                   ? X86G_CC_MASK_C : 0;
617      case X86G_CC_OP_SUBB:
618         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619                   ? X86G_CC_MASK_C : 0;
620      case X86G_CC_OP_INCL:
621      case X86G_CC_OP_DECL:
622         return cc_ndep & X86G_CC_MASK_C;
623      default:
624         break;
625   }
626
627#  if PROFILE_EFLAGS
628   tabc_fast[cc_op]--;
629   tabc_slow[cc_op]++;
630#  endif
631
632   return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633          & X86G_CC_MASK_C;
634}
635
636
637/* CALLED FROM GENERATED CODE: CLEAN HELPER */
638/* returns 1 or 0 */
639UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640                                UInt cc_op,
641                                UInt cc_dep1,
642                                UInt cc_dep2,
643                                UInt cc_ndep )
644{
645   UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646                                               cc_dep2, cc_ndep);
647   UInt of,sf,zf,cf,pf;
648   UInt inv = cond & 1;
649
650#  if PROFILE_EFLAGS
651   if (!initted) initCounts();
652   tab_cond[cc_op][cond]++;
653   n_calc_cond++;
654   if (SHOW_COUNTS_NOW) showCounts();
655#  endif
656
657   switch (cond) {
658      case X86CondNO:
659      case X86CondO: /* OF == 1 */
660         of = eflags >> X86G_CC_SHIFT_O;
661         return 1 & (inv ^ of);
662
663      case X86CondNZ:
664      case X86CondZ: /* ZF == 1 */
665         zf = eflags >> X86G_CC_SHIFT_Z;
666         return 1 & (inv ^ zf);
667
668      case X86CondNB:
669      case X86CondB: /* CF == 1 */
670         cf = eflags >> X86G_CC_SHIFT_C;
671         return 1 & (inv ^ cf);
672         break;
673
674      case X86CondNBE:
675      case X86CondBE: /* (CF or ZF) == 1 */
676         cf = eflags >> X86G_CC_SHIFT_C;
677         zf = eflags >> X86G_CC_SHIFT_Z;
678         return 1 & (inv ^ (cf | zf));
679         break;
680
681      case X86CondNS:
682      case X86CondS: /* SF == 1 */
683         sf = eflags >> X86G_CC_SHIFT_S;
684         return 1 & (inv ^ sf);
685
686      case X86CondNP:
687      case X86CondP: /* PF == 1 */
688         pf = eflags >> X86G_CC_SHIFT_P;
689         return 1 & (inv ^ pf);
690
691      case X86CondNL:
692      case X86CondL: /* (SF xor OF) == 1 */
693         sf = eflags >> X86G_CC_SHIFT_S;
694         of = eflags >> X86G_CC_SHIFT_O;
695         return 1 & (inv ^ (sf ^ of));
696         break;
697
698      case X86CondNLE:
699      case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
700         sf = eflags >> X86G_CC_SHIFT_S;
701         of = eflags >> X86G_CC_SHIFT_O;
702         zf = eflags >> X86G_CC_SHIFT_Z;
703         return 1 & (inv ^ ((sf ^ of) | zf));
704         break;
705
706      default:
707         /* shouldn't really make these calls from generated code */
708         vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709                    cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710         vpanic("x86g_calculate_condition");
711   }
712}
713
714
715/* VISIBLE TO LIBVEX CLIENT */
716UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717{
718   UInt eflags = x86g_calculate_eflags_all_WRK(
719                    vex_state->guest_CC_OP,
720                    vex_state->guest_CC_DEP1,
721                    vex_state->guest_CC_DEP2,
722                    vex_state->guest_CC_NDEP
723                 );
724   UInt dflag = vex_state->guest_DFLAG;
725   vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726   if (dflag == 0xFFFFFFFF)
727      eflags |= X86G_CC_MASK_D;
728   if (vex_state->guest_IDFLAG == 1)
729      eflags |= X86G_CC_MASK_ID;
730   if (vex_state->guest_ACFLAG == 1)
731      eflags |= X86G_CC_MASK_AC;
732
733   return eflags;
734}
735
736/* VISIBLE TO LIBVEX CLIENT */
737void
738LibVEX_GuestX86_put_eflags ( UInt eflags,
739                             /*MOD*/VexGuestX86State* vex_state )
740{
741   /* D flag */
742   if (eflags & X86G_CC_MASK_D) {
743      vex_state->guest_DFLAG = 0xFFFFFFFF;
744      eflags &= ~X86G_CC_MASK_D;
745   }
746   else
747      vex_state->guest_DFLAG = 1;
748
749   /* ID flag */
750   if (eflags & X86G_CC_MASK_ID) {
751      vex_state->guest_IDFLAG = 1;
752      eflags &= ~X86G_CC_MASK_ID;
753   }
754   else
755      vex_state->guest_IDFLAG = 0;
756
757   /* AC flag */
758   if (eflags & X86G_CC_MASK_AC) {
759      vex_state->guest_ACFLAG = 1;
760      eflags &= ~X86G_CC_MASK_AC;
761   }
762   else
763      vex_state->guest_ACFLAG = 0;
764
765   UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766                  X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
768   vex_state->guest_CC_DEP1 = eflags & cc_mask;
769   vex_state->guest_CC_DEP2 = 0;
770   vex_state->guest_CC_NDEP = 0;
771}
772
773/* VISIBLE TO LIBVEX CLIENT */
774void
775LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776                              /*MOD*/VexGuestX86State* vex_state )
777{
778   UInt oszacp = x86g_calculate_eflags_all_WRK(
779                    vex_state->guest_CC_OP,
780                    vex_state->guest_CC_DEP1,
781                    vex_state->guest_CC_DEP2,
782                    vex_state->guest_CC_NDEP
783                 );
784   if (new_carry_flag & 1) {
785      oszacp |= X86G_CC_MASK_C;
786   } else {
787      oszacp &= ~X86G_CC_MASK_C;
788   }
789   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
790   vex_state->guest_CC_DEP1 = oszacp;
791   vex_state->guest_CC_DEP2 = 0;
792   vex_state->guest_CC_NDEP = 0;
793}
794
795
796/*---------------------------------------------------------------*/
797/*--- %eflags translation-time function specialisers.         ---*/
798/*--- These help iropt specialise calls the above run-time    ---*/
799/*--- %eflags functions.                                      ---*/
800/*---------------------------------------------------------------*/
801
802/* Used by the optimiser to try specialisations.  Returns an
803   equivalent expression, or NULL if none. */
804
805static inline Bool isU32 ( IRExpr* e, UInt n )
806{
807   return
808      toBool( e->tag == Iex_Const
809              && e->Iex.Const.con->tag == Ico_U32
810              && e->Iex.Const.con->Ico.U32 == n );
811}
812
813IRExpr* guest_x86_spechelper ( const HChar* function_name,
814                               IRExpr** args,
815                               IRStmt** precedingStmts,
816                               Int      n_precedingStmts )
817{
818#  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819#  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820#  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821#  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
822
823   Int i, arity = 0;
824   for (i = 0; args[i]; i++)
825      arity++;
826#  if 0
827   vex_printf("spec request:\n");
828   vex_printf("   %s  ", function_name);
829   for (i = 0; i < arity; i++) {
830      vex_printf("  ");
831      ppIRExpr(args[i]);
832   }
833   vex_printf("\n");
834#  endif
835
836   /* --------- specialising "x86g_calculate_condition" --------- */
837
838   if (vex_streq(function_name, "x86g_calculate_condition")) {
839      /* specialise calls to above "calculate condition" function */
840      IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841      vassert(arity == 5);
842      cond    = args[0];
843      cc_op   = args[1];
844      cc_dep1 = args[2];
845      cc_dep2 = args[3];
846
847      /*---------------- ADDL ----------------*/
848
849      if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850         /* long add, then Z --> test (dst+src == 0) */
851         return unop(Iop_1Uto32,
852                     binop(Iop_CmpEQ32,
853                           binop(Iop_Add32, cc_dep1, cc_dep2),
854                           mkU32(0)));
855      }
856
857      /*---------------- SUBL ----------------*/
858
859      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
860         /* long sub/cmp, then Z --> test dst==src */
861         return unop(Iop_1Uto32,
862                     binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
863      }
864      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
865         /* long sub/cmp, then NZ --> test dst!=src */
866         return unop(Iop_1Uto32,
867                     binop(Iop_CmpNE32, cc_dep1, cc_dep2));
868      }
869
870      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
871         /* long sub/cmp, then L (signed less than)
872            --> test dst <s src */
873         return unop(Iop_1Uto32,
874                     binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
875      }
876      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
877         /* long sub/cmp, then NL (signed greater than or equal)
878            --> test !(dst <s src) */
879         return binop(Iop_Xor32,
880                      unop(Iop_1Uto32,
881                           binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
882                      mkU32(1));
883      }
884
885      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
886         /* long sub/cmp, then LE (signed less than or equal)
887            --> test dst <=s src */
888         return unop(Iop_1Uto32,
889                     binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
890      }
891      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
892         /* long sub/cmp, then NLE (signed not less than or equal)
893            --> test dst >s src
894            --> test !(dst <=s src) */
895         return binop(Iop_Xor32,
896                      unop(Iop_1Uto32,
897                           binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
898                      mkU32(1));
899      }
900
901      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
902         /* long sub/cmp, then BE (unsigned less than or equal)
903            --> test dst <=u src */
904         return unop(Iop_1Uto32,
905                     binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
906      }
907      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
908         /* long sub/cmp, then BE (unsigned greater than)
909            --> test !(dst <=u src) */
910         return binop(Iop_Xor32,
911                      unop(Iop_1Uto32,
912                           binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
913                      mkU32(1));
914      }
915
916      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
917         /* long sub/cmp, then B (unsigned less than)
918            --> test dst <u src */
919         return unop(Iop_1Uto32,
920                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
921      }
922      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
923         /* long sub/cmp, then NB (unsigned greater than or equal)
924            --> test !(dst <u src) */
925         return binop(Iop_Xor32,
926                      unop(Iop_1Uto32,
927                           binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
928                      mkU32(1));
929      }
930
931      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
932         /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
933         return unop(Iop_1Uto32,
934                     binop(Iop_CmpLT32S,
935                           binop(Iop_Sub32, cc_dep1, cc_dep2),
936                           mkU32(0)));
937      }
938      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
939         /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
940         return binop(Iop_Xor32,
941                      unop(Iop_1Uto32,
942                           binop(Iop_CmpLT32S,
943                                 binop(Iop_Sub32, cc_dep1, cc_dep2),
944                                 mkU32(0))),
945                      mkU32(1));
946      }
947
948      /*---------------- SUBW ----------------*/
949
950      if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
951         /* word sub/cmp, then Z --> test dst==src */
952         return unop(Iop_1Uto32,
953                     binop(Iop_CmpEQ16,
954                           unop(Iop_32to16,cc_dep1),
955                           unop(Iop_32to16,cc_dep2)));
956      }
957      if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
958         /* word sub/cmp, then NZ --> test dst!=src */
959         return unop(Iop_1Uto32,
960                     binop(Iop_CmpNE16,
961                           unop(Iop_32to16,cc_dep1),
962                           unop(Iop_32to16,cc_dep2)));
963      }
964
965      /*---------------- SUBB ----------------*/
966
967      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
968         /* byte sub/cmp, then Z --> test dst==src */
969         return unop(Iop_1Uto32,
970                     binop(Iop_CmpEQ8,
971                           unop(Iop_32to8,cc_dep1),
972                           unop(Iop_32to8,cc_dep2)));
973      }
974      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
975         /* byte sub/cmp, then NZ --> test dst!=src */
976         return unop(Iop_1Uto32,
977                     binop(Iop_CmpNE8,
978                           unop(Iop_32to8,cc_dep1),
979                           unop(Iop_32to8,cc_dep2)));
980      }
981
982      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
983         /* byte sub/cmp, then NBE (unsigned greater than)
984            --> test src <u dst */
985         /* Note, args are opposite way round from the usual */
986         return unop(Iop_1Uto32,
987                     binop(Iop_CmpLT32U,
988                           binop(Iop_And32,cc_dep2,mkU32(0xFF)),
989			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
990      }
991
992      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
993                                        && isU32(cc_dep2, 0)) {
994         /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
995                                         --> test dst <s 0
996                                         --> (UInt)dst[7]
997            This is yet another scheme by which gcc figures out if the
998            top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
999         /* Note: isU32(cc_dep2, 0) is correct, even though this is
1000            for an 8-bit comparison, since the args to the helper
1001            function are always U32s. */
1002         return binop(Iop_And32,
1003                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1004                      mkU32(1));
1005      }
1006      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1007                                        && isU32(cc_dep2, 0)) {
1008         /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1009                                          --> test !(dst <s 0)
1010                                          --> (UInt) !dst[7]
1011         */
1012         return binop(Iop_Xor32,
1013                      binop(Iop_And32,
1014                            binop(Iop_Shr32,cc_dep1,mkU8(7)),
1015                            mkU32(1)),
1016                mkU32(1));
1017      }
1018
1019      /*---------------- LOGICL ----------------*/
1020
1021      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1022         /* long and/or/xor, then Z --> test dst==0 */
1023         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1024      }
1025      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1026         /* long and/or/xor, then NZ --> test dst!=0 */
1027         return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1028      }
1029
1030      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1031         /* long and/or/xor, then LE
1032            This is pretty subtle.  LOGIC sets SF and ZF according to the
1033            result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
1034            OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1035            the result is <=signed 0.  Hence ...
1036         */
1037         return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1038      }
1039
1040      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1041         /* long and/or/xor, then BE
1042            LOGIC sets ZF according to the result and makes CF be zero.
1043            BE computes (CF | ZF), but CF is zero, so this reduces ZF
1044            -- which will be 1 iff the result is zero.  Hence ...
1045         */
1046         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1047      }
1048
1049      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1050         /* see comment below for (LOGICB, CondS) */
1051         /* long and/or/xor, then S --> (UInt)result[31] */
1052         return binop(Iop_And32,
1053                      binop(Iop_Shr32,cc_dep1,mkU8(31)),
1054                      mkU32(1));
1055      }
1056      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1057         /* see comment below for (LOGICB, CondNS) */
1058         /* long and/or/xor, then S --> (UInt) ~ result[31] */
1059         return binop(Iop_Xor32,
1060                binop(Iop_And32,
1061                      binop(Iop_Shr32,cc_dep1,mkU8(31)),
1062                      mkU32(1)),
1063                mkU32(1));
1064      }
1065
1066      /*---------------- LOGICW ----------------*/
1067
1068      if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1069         /* word and/or/xor, then Z --> test dst==0 */
1070         return unop(Iop_1Uto32,
1071                     binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1072                                        mkU32(0)));
1073      }
1074
1075      if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1076         /* see comment below for (LOGICB, CondS) */
1077         /* word and/or/xor, then S --> (UInt)result[15] */
1078         return binop(Iop_And32,
1079                      binop(Iop_Shr32,cc_dep1,mkU8(15)),
1080                      mkU32(1));
1081      }
1082
1083      /*---------------- LOGICB ----------------*/
1084
1085      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1086         /* byte and/or/xor, then Z --> test dst==0 */
1087         return unop(Iop_1Uto32,
1088                     binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1089                                        mkU32(0)));
1090      }
1091      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1092         /* byte and/or/xor, then Z --> test dst!=0 */
1093         /* b9ac9:       84 c0                   test   %al,%al
1094            b9acb:       75 0d                   jne    b9ada */
1095         return unop(Iop_1Uto32,
1096                     binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1097                                        mkU32(0)));
1098      }
1099
1100      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1101         /* this is an idiom gcc sometimes uses to find out if the top
1102            bit of a byte register is set: eg testb %al,%al; js ..
1103            Since it just depends on the top bit of the byte, extract
1104            that bit and explicitly get rid of all the rest.  This
1105            helps memcheck avoid false positives in the case where any
1106            of the other bits in the byte are undefined. */
1107         /* byte and/or/xor, then S --> (UInt)result[7] */
1108         return binop(Iop_And32,
1109                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1110                      mkU32(1));
1111      }
1112      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1113         /* ditto, for negation-of-S. */
1114         /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1115         return binop(Iop_Xor32,
1116                binop(Iop_And32,
1117                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1118                      mkU32(1)),
1119                mkU32(1));
1120      }
1121
1122      /*---------------- DECL ----------------*/
1123
1124      if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1125         /* dec L, then Z --> test dst == 0 */
1126         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1127      }
1128
1129      if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1130         /* dec L, then S --> compare DST <s 0 */
1131         return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1132      }
1133
1134      /*---------------- DECW ----------------*/
1135
1136      if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1137         /* dec W, then Z --> test dst == 0 */
1138         return unop(Iop_1Uto32,
1139                     binop(Iop_CmpEQ32,
1140                           binop(Iop_Shl32,cc_dep1,mkU8(16)),
1141                           mkU32(0)));
1142      }
1143
1144      /*---------------- INCW ----------------*/
1145
1146      if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1147         /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1148         /* inc W, then Z --> test dst == 0 */
1149         return unop(Iop_1Uto32,
1150                     binop(Iop_CmpEQ32,
1151                           binop(Iop_Shl32,cc_dep1,mkU8(16)),
1152                           mkU32(0)));
1153      }
1154
1155      /*---------------- SHRL ----------------*/
1156
1157      if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1158         /* SHRL, then Z --> test dep1 == 0 */
1159         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1160      }
1161
1162      /*---------------- COPY ----------------*/
1163      /* This can happen, as a result of x87 FP compares: "fcom ... ;
1164         fnstsw %ax ; sahf ; jbe" for example. */
1165
1166      if (isU32(cc_op, X86G_CC_OP_COPY) &&
1167          (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1168         /* COPY, then BE --> extract C and Z from dep1, and test
1169            (C or Z) == 1. */
1170         /* COPY, then NBE --> extract C and Z from dep1, and test
1171            (C or Z) == 0. */
1172         UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1173         return
1174            unop(
1175               Iop_1Uto32,
1176               binop(
1177                  Iop_CmpEQ32,
1178                  binop(
1179                     Iop_And32,
1180                     binop(
1181                        Iop_Or32,
1182                        binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1183                        binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1184                     ),
1185                     mkU32(1)
1186                  ),
1187                  mkU32(nnn)
1188               )
1189            );
1190      }
1191
1192      if (isU32(cc_op, X86G_CC_OP_COPY)
1193          && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1194         /* COPY, then B --> extract C from dep1, and test (C == 1). */
1195         /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1196         UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1197         return
1198            unop(
1199               Iop_1Uto32,
1200               binop(
1201                  Iop_CmpEQ32,
1202                  binop(
1203                     Iop_And32,
1204                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1205                     mkU32(1)
1206                  ),
1207                  mkU32(nnn)
1208               )
1209            );
1210      }
1211
1212      if (isU32(cc_op, X86G_CC_OP_COPY)
1213          && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1214         /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1215         /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1216         UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1217         return
1218            unop(
1219               Iop_1Uto32,
1220               binop(
1221                  Iop_CmpEQ32,
1222                  binop(
1223                     Iop_And32,
1224                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1225                     mkU32(1)
1226                  ),
1227                  mkU32(nnn)
1228               )
1229            );
1230      }
1231
1232      if (isU32(cc_op, X86G_CC_OP_COPY)
1233          && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1234         /* COPY, then P --> extract P from dep1, and test (P == 1). */
1235         /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1236         UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1237         return
1238            unop(
1239               Iop_1Uto32,
1240               binop(
1241                  Iop_CmpEQ32,
1242                  binop(
1243                     Iop_And32,
1244                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1245                     mkU32(1)
1246                  ),
1247                  mkU32(nnn)
1248               )
1249            );
1250      }
1251
1252      return NULL;
1253   }
1254
1255   /* --------- specialising "x86g_calculate_eflags_c" --------- */
1256
1257   if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1258      /* specialise calls to above "calculate_eflags_c" function */
1259      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1260      vassert(arity == 4);
1261      cc_op   = args[0];
1262      cc_dep1 = args[1];
1263      cc_dep2 = args[2];
1264      cc_ndep = args[3];
1265
1266      if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1267         /* C after sub denotes unsigned less than */
1268         return unop(Iop_1Uto32,
1269                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1270      }
1271      if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1272         /* C after sub denotes unsigned less than */
1273         return unop(Iop_1Uto32,
1274                     binop(Iop_CmpLT32U,
1275                           binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1276                           binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1277      }
1278      if (isU32(cc_op, X86G_CC_OP_LOGICL)
1279          || isU32(cc_op, X86G_CC_OP_LOGICW)
1280          || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1281         /* cflag after logic is zero */
1282         return mkU32(0);
1283      }
1284      if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1285         /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1286         return cc_ndep;
1287      }
1288      if (isU32(cc_op, X86G_CC_OP_COPY)) {
1289         /* cflag after COPY is stored in DEP1. */
1290         return
1291            binop(
1292               Iop_And32,
1293               binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1294               mkU32(1)
1295            );
1296      }
1297      if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1298         /* C after add denotes sum <u either arg */
1299         return unop(Iop_1Uto32,
1300                     binop(Iop_CmpLT32U,
1301                           binop(Iop_Add32, cc_dep1, cc_dep2),
1302                           cc_dep1));
1303      }
1304      // ATC, requires verification, no test case known
1305      //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1306      //   /* C after signed widening multiply denotes the case where
1307      //      the top half of the result isn't simply the sign extension
1308      //      of the bottom half (iow the result doesn't fit completely
1309      //      in the bottom half).  Hence:
1310      //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1311      //      where 'x' denotes signed widening multiply.*/
1312      //   return
1313      //      unop(Iop_1Uto32,
1314      //           binop(Iop_CmpNE32,
1315      //                 unop(Iop_64HIto32,
1316      //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1317      //                 binop(Iop_Sar32,
1318      //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1319      //}
1320#     if 0
1321      if (cc_op->tag == Iex_Const) {
1322         vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1323      }
1324#     endif
1325
1326      return NULL;
1327   }
1328
1329   /* --------- specialising "x86g_calculate_eflags_all" --------- */
1330
1331   if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1332      /* specialise calls to above "calculate_eflags_all" function */
1333      IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1334      vassert(arity == 4);
1335      cc_op   = args[0];
1336      cc_dep1 = args[1];
1337      /* cc_dep2 = args[2]; */
1338      /* cc_ndep = args[3]; */
1339
1340      if (isU32(cc_op, X86G_CC_OP_COPY)) {
1341         /* eflags after COPY are stored in DEP1. */
1342         return
1343            binop(
1344               Iop_And32,
1345               cc_dep1,
1346               mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1347                     | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1348            );
1349      }
1350      return NULL;
1351   }
1352
1353#  undef unop
1354#  undef binop
1355#  undef mkU32
1356#  undef mkU8
1357
1358   return NULL;
1359}
1360
1361
1362/*---------------------------------------------------------------*/
1363/*--- Supporting functions for x87 FPU activities.            ---*/
1364/*---------------------------------------------------------------*/
1365
1366static inline Bool host_is_little_endian ( void )
1367{
1368   UInt x = 0x76543210;
1369   UChar* p = (UChar*)(&x);
1370   return toBool(*p == 0x10);
1371}
1372
1373/* 80 and 64-bit floating point formats:
1374
1375   80-bit:
1376
1377    S  0       0-------0      zero
1378    S  0       0X------X      denormals
1379    S  1-7FFE  1X------X      normals (all normals have leading 1)
1380    S  7FFF    10------0      infinity
1381    S  7FFF    10X-----X      snan
1382    S  7FFF    11X-----X      qnan
1383
1384   S is the sign bit.  For runs X----X, at least one of the Xs must be
1385   nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1386   there is an explicitly represented leading 1, and a sign bit,
1387   giving 80 in total.
1388
1389   64-bit avoids the confusion of an explicitly represented leading 1
1390   and so is simpler:
1391
1392    S  0      0------0   zero
1393    S  0      X------X   denormals
1394    S  1-7FE  any        normals
1395    S  7FF    0------0   infinity
1396    S  7FF    0X-----X   snan
1397    S  7FF    1X-----X   qnan
1398
1399   Exponent is 11 bits, fractional part is 52 bits, and there is a
1400   sign bit, giving 64 in total.
1401*/
1402
1403/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1404/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1405UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1406{
1407   Bool   mantissaIsZero;
1408   Int    bexp;
1409   UChar  sign;
1410   UChar* f64;
1411
1412   vassert(host_is_little_endian());
1413
1414   /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1415
1416   f64  = (UChar*)(&dbl);
1417   sign = toUChar( (f64[7] >> 7) & 1 );
1418
1419   /* First off, if the tag indicates the register was empty,
1420      return 1,0,sign,1 */
1421   if (tag == 0) {
1422      /* vex_printf("Empty\n"); */
1423      return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1424                                 | X86G_FC_MASK_C0;
1425   }
1426
1427   bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1428   bexp &= 0x7FF;
1429
1430   mantissaIsZero
1431      = toBool(
1432           (f64[6] & 0x0F) == 0
1433           && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1434        );
1435
1436   /* If both exponent and mantissa are zero, the value is zero.
1437      Return 1,0,sign,0. */
1438   if (bexp == 0 && mantissaIsZero) {
1439      /* vex_printf("Zero\n"); */
1440      return X86G_FC_MASK_C3 | 0
1441                             | (sign << X86G_FC_SHIFT_C1) | 0;
1442   }
1443
1444   /* If exponent is zero but mantissa isn't, it's a denormal.
1445      Return 1,1,sign,0. */
1446   if (bexp == 0 && !mantissaIsZero) {
1447      /* vex_printf("Denormal\n"); */
1448      return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1449                             | (sign << X86G_FC_SHIFT_C1) | 0;
1450   }
1451
1452   /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1453      Return 0,1,sign,1. */
1454   if (bexp == 0x7FF && mantissaIsZero) {
1455      /* vex_printf("Inf\n"); */
1456      return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1457                                 | X86G_FC_MASK_C0;
1458   }
1459
1460   /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1461      Return 0,0,sign,1. */
1462   if (bexp == 0x7FF && !mantissaIsZero) {
1463      /* vex_printf("NaN\n"); */
1464      return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1465   }
1466
1467   /* Uh, ok, we give up.  It must be a normal finite number.
1468      Return 0,1,sign,0.
1469   */
1470   /* vex_printf("normal\n"); */
1471   return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1472}
1473
1474
1475/* CALLED FROM GENERATED CODE */
1476/* DIRTY HELPER (reads guest memory) */
1477ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1478{
1479   ULong f64;
1480   convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1481   return f64;
1482}
1483
1484/* CALLED FROM GENERATED CODE */
1485/* DIRTY HELPER (writes guest memory) */
1486void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1487{
1488   convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1489}
1490
1491
1492/*----------------------------------------------*/
1493/*--- The exported fns ..                    ---*/
1494/*----------------------------------------------*/
1495
1496/* Layout of the real x87 state. */
1497/* 13 June 05: Fpu_State and auxiliary constants was moved to
1498   g_generic_x87.h */
1499
1500
1501/* CLEAN HELPER */
1502/* fpucw[15:0] contains a x87 native format FPU control word.
1503   Extract from it the required FPROUND value and any resulting
1504   emulation warning, and return (warn << 32) | fpround value.
1505*/
1506ULong x86g_check_fldcw ( UInt fpucw )
1507{
1508   /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1509   /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510   UInt rmode = (fpucw >> 10) & 3;
1511
1512   /* Detect any required emulation warnings. */
1513   VexEmNote ew = EmNote_NONE;
1514
1515   if ((fpucw & 0x3F) != 0x3F) {
1516      /* unmasked exceptions! */
1517      ew = EmWarn_X86_x87exns;
1518   }
1519   else
1520   if (((fpucw >> 8) & 3) != 3) {
1521      /* unsupported precision */
1522      ew = EmWarn_X86_x87precision;
1523   }
1524
1525   return (((ULong)ew) << 32) | ((ULong)rmode);
1526}
1527
1528/* CLEAN HELPER */
1529/* Given fpround as an IRRoundingMode value, create a suitable x87
1530   native format FPU control word. */
1531UInt x86g_create_fpucw ( UInt fpround )
1532{
1533   fpround &= 3;
1534   return 0x037F | (fpround << 10);
1535}
1536
1537
1538/* CLEAN HELPER */
1539/* mxcsr[15:0] contains a SSE native format MXCSR value.
1540   Extract from it the required SSEROUND value and any resulting
1541   emulation warning, and return (warn << 32) | sseround value.
1542*/
1543ULong x86g_check_ldmxcsr ( UInt mxcsr )
1544{
1545   /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1546   /* NOTE, encoded exactly as per enum IRRoundingMode. */
1547   UInt rmode = (mxcsr >> 13) & 3;
1548
1549   /* Detect any required emulation warnings. */
1550   VexEmNote ew = EmNote_NONE;
1551
1552   if ((mxcsr & 0x1F80) != 0x1F80) {
1553      /* unmasked exceptions! */
1554      ew = EmWarn_X86_sseExns;
1555   }
1556   else
1557   if (mxcsr & (1<<15)) {
1558      /* FZ is set */
1559      ew = EmWarn_X86_fz;
1560   }
1561   else
1562   if (mxcsr & (1<<6)) {
1563      /* DAZ is set */
1564      ew = EmWarn_X86_daz;
1565   }
1566
1567   return (((ULong)ew) << 32) | ((ULong)rmode);
1568}
1569
1570
1571/* CLEAN HELPER */
1572/* Given sseround as an IRRoundingMode value, create a suitable SSE
1573   native format MXCSR value. */
1574UInt x86g_create_mxcsr ( UInt sseround )
1575{
1576   sseround &= 3;
1577   return 0x1F80 | (sseround << 13);
1578}
1579
1580
1581/* CALLED FROM GENERATED CODE */
1582/* DIRTY HELPER (writes guest state) */
1583/* Initialise the x87 FPU state as per 'finit'. */
1584void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1585{
1586   Int i;
1587   gst->guest_FTOP = 0;
1588   for (i = 0; i < 8; i++) {
1589      gst->guest_FPTAG[i] = 0; /* empty */
1590      gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1591   }
1592   gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1593   gst->guest_FC3210  = 0;
1594}
1595
1596
1597/* This is used to implement both 'frstor' and 'fldenv'.  The latter
1598   appears to differ from the former only in that the 8 FP registers
1599   themselves are not transferred into the guest state. */
1600static
1601VexEmNote do_put_x87 ( Bool moveRegs,
1602                       /*IN*/UChar* x87_state,
1603                       /*OUT*/VexGuestX86State* vex_state )
1604{
1605   Int        stno, preg;
1606   UInt       tag;
1607   ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1608   UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1609   Fpu_State* x87     = (Fpu_State*)x87_state;
1610   UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1611   UInt       tagw    = x87->env[FP_ENV_TAG];
1612   UInt       fpucw   = x87->env[FP_ENV_CTRL];
1613   UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1614   VexEmNote  ew;
1615   UInt       fpround;
1616   ULong      pair;
1617
1618   /* Copy registers and tags */
1619   for (stno = 0; stno < 8; stno++) {
1620      preg = (stno + ftop) & 7;
1621      tag = (tagw >> (2*preg)) & 3;
1622      if (tag == 3) {
1623         /* register is empty */
1624         /* hmm, if it's empty, does it still get written?  Probably
1625            safer to say it does.  If we don't, memcheck could get out
1626            of sync, in that it thinks all FP registers are defined by
1627            this helper, but in reality some have not been updated. */
1628         if (moveRegs)
1629            vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1630         vexTags[preg] = 0;
1631      } else {
1632         /* register is non-empty */
1633         if (moveRegs)
1634            convert_f80le_to_f64le( &x87->reg[10*stno],
1635                                    (UChar*)&vexRegs[preg] );
1636         vexTags[preg] = 1;
1637      }
1638   }
1639
1640   /* stack pointer */
1641   vex_state->guest_FTOP = ftop;
1642
1643   /* status word */
1644   vex_state->guest_FC3210 = c3210;
1645
1646   /* handle the control word, setting FPROUND and detecting any
1647      emulation warnings. */
1648   pair    = x86g_check_fldcw ( (UInt)fpucw );
1649   fpround = (UInt)pair;
1650   ew      = (VexEmNote)(pair >> 32);
1651
1652   vex_state->guest_FPROUND = fpround & 3;
1653
1654   /* emulation warnings --> caller */
1655   return ew;
1656}
1657
1658
1659/* Create an x87 FPU state from the guest state, as close as
1660   we can approximate it. */
1661static
1662void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1663                  /*OUT*/UChar* x87_state )
1664{
1665   Int        i, stno, preg;
1666   UInt       tagw;
1667   ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1668   UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1669   Fpu_State* x87     = (Fpu_State*)x87_state;
1670   UInt       ftop    = vex_state->guest_FTOP;
1671   UInt       c3210   = vex_state->guest_FC3210;
1672
1673   for (i = 0; i < 14; i++)
1674      x87->env[i] = 0;
1675
1676   x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1677   x87->env[FP_ENV_STAT]
1678      = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1679   x87->env[FP_ENV_CTRL]
1680      = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1681
1682   /* Dump the register stack in ST order. */
1683   tagw = 0;
1684   for (stno = 0; stno < 8; stno++) {
1685      preg = (stno + ftop) & 7;
1686      if (vexTags[preg] == 0) {
1687         /* register is empty */
1688         tagw |= (3 << (2*preg));
1689         convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1690                                 &x87->reg[10*stno] );
1691      } else {
1692         /* register is full. */
1693         tagw |= (0 << (2*preg));
1694         convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1695                                 &x87->reg[10*stno] );
1696      }
1697   }
1698   x87->env[FP_ENV_TAG] = toUShort(tagw);
1699}
1700
1701
1702/* CALLED FROM GENERATED CODE */
1703/* DIRTY HELPER (reads guest state, writes guest mem) */
1704void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1705{
1706   /* Somewhat roundabout, but at least it's simple. */
1707   Fpu_State tmp;
1708   UShort*   addrS = (UShort*)addr;
1709   UChar*    addrC = (UChar*)addr;
1710   U128*     xmm   = (U128*)(addr + 160);
1711   UInt      mxcsr;
1712   UShort    fp_tags;
1713   UInt      summary_tags;
1714   Int       r, stno;
1715   UShort    *srcS, *dstS;
1716
1717   do_get_x87( gst, (UChar*)&tmp );
1718   mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1719
1720   /* Now build the proper fxsave image from the x87 image we just
1721      made. */
1722
1723   addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1724   addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1725
1726   /* set addrS[2] in an endian-independent way */
1727   summary_tags = 0;
1728   fp_tags = tmp.env[FP_ENV_TAG];
1729   for (r = 0; r < 8; r++) {
1730      if ( ((fp_tags >> (2*r)) & 3) != 3 )
1731         summary_tags |= (1 << r);
1732   }
1733   addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1734   addrC[5]  = 0; /* pad */
1735
1736   addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1737   addrS[4]  = 0;
1738   addrS[5]  = 0; /* FPU IP (bogus) */
1739   addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1740                     could conceivably dump %CS here) */
1741
1742   addrS[7]  = 0; /* Intel reserved */
1743
1744   addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1745   addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1746   addrS[10] = 0; /* segment selector for above operand pointer; %DS
1747                     perhaps? */
1748   addrS[11] = 0; /* Intel reserved */
1749
1750   addrS[12] = toUShort(mxcsr);  /* MXCSR */
1751   addrS[13] = toUShort(mxcsr >> 16);
1752
1753   addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1754   addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1755
1756   /* Copy in the FP registers, in ST order. */
1757   for (stno = 0; stno < 8; stno++) {
1758      srcS = (UShort*)(&tmp.reg[10*stno]);
1759      dstS = (UShort*)(&addrS[16 + 8*stno]);
1760      dstS[0] = srcS[0];
1761      dstS[1] = srcS[1];
1762      dstS[2] = srcS[2];
1763      dstS[3] = srcS[3];
1764      dstS[4] = srcS[4];
1765      dstS[5] = 0;
1766      dstS[6] = 0;
1767      dstS[7] = 0;
1768   }
1769
1770   /* That's the first 160 bytes of the image done.  Now only %xmm0
1771      .. %xmm7 remain to be copied.  If the host is big-endian, these
1772      need to be byte-swapped. */
1773   vassert(host_is_little_endian());
1774
1775#  define COPY_U128(_dst,_src)                       \
1776      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1777           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1778      while (0)
1779
1780   COPY_U128( xmm[0], gst->guest_XMM0 );
1781   COPY_U128( xmm[1], gst->guest_XMM1 );
1782   COPY_U128( xmm[2], gst->guest_XMM2 );
1783   COPY_U128( xmm[3], gst->guest_XMM3 );
1784   COPY_U128( xmm[4], gst->guest_XMM4 );
1785   COPY_U128( xmm[5], gst->guest_XMM5 );
1786   COPY_U128( xmm[6], gst->guest_XMM6 );
1787   COPY_U128( xmm[7], gst->guest_XMM7 );
1788
1789#  undef COPY_U128
1790}
1791
1792
1793/* CALLED FROM GENERATED CODE */
1794/* DIRTY HELPER (writes guest state, reads guest mem) */
1795VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1796{
1797   Fpu_State tmp;
1798   VexEmNote warnX87 = EmNote_NONE;
1799   VexEmNote warnXMM = EmNote_NONE;
1800   UShort*   addrS   = (UShort*)addr;
1801   UChar*    addrC   = (UChar*)addr;
1802   U128*     xmm     = (U128*)(addr + 160);
1803   UShort    fp_tags;
1804   Int       r, stno, i;
1805
1806   /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1807      to be byte-swapped. */
1808   vassert(host_is_little_endian());
1809
1810#  define COPY_U128(_dst,_src)                       \
1811      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1812           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1813      while (0)
1814
1815   COPY_U128( gst->guest_XMM0, xmm[0] );
1816   COPY_U128( gst->guest_XMM1, xmm[1] );
1817   COPY_U128( gst->guest_XMM2, xmm[2] );
1818   COPY_U128( gst->guest_XMM3, xmm[3] );
1819   COPY_U128( gst->guest_XMM4, xmm[4] );
1820   COPY_U128( gst->guest_XMM5, xmm[5] );
1821   COPY_U128( gst->guest_XMM6, xmm[6] );
1822   COPY_U128( gst->guest_XMM7, xmm[7] );
1823
1824#  undef COPY_U128
1825
1826   /* Copy the x87 registers out of the image, into a temporary
1827      Fpu_State struct. */
1828
1829   /* LLVM on Darwin turns the following loop into a movaps plus a
1830      handful of scalar stores.  This would work fine except for the
1831      fact that VEX doesn't keep the stack correctly (16-) aligned for
1832      the call, so it segfaults.  Hence, split the loop into two
1833      pieces (and pray LLVM doesn't merely glue them back together) so
1834      it's composed only of scalar stores and so is alignment
1835      insensitive.  Of course this is a kludge of the lamest kind --
1836      VEX should be fixed properly. */
1837   /* Code that seems to trigger the problem:
1838      for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1839   for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1840   __asm__ __volatile__("" ::: "memory");
1841   for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1842
1843   for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1844   /* fill in tmp.reg[0..7] */
1845   for (stno = 0; stno < 8; stno++) {
1846      UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1847      UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1848      dstS[0] = srcS[0];
1849      dstS[1] = srcS[1];
1850      dstS[2] = srcS[2];
1851      dstS[3] = srcS[3];
1852      dstS[4] = srcS[4];
1853   }
1854   /* fill in tmp.env[0..13] */
1855   tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1856   tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1857
1858   fp_tags = 0;
1859   for (r = 0; r < 8; r++) {
1860      if (addrC[4] & (1<<r))
1861         fp_tags |= (0 << (2*r)); /* EMPTY */
1862      else
1863         fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1864   }
1865   tmp.env[FP_ENV_TAG] = fp_tags;
1866
1867   /* Now write 'tmp' into the guest state. */
1868   warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1869
1870   { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1871                | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1872     ULong w64 = x86g_check_ldmxcsr( w32 );
1873
1874     warnXMM = (VexEmNote)(w64 >> 32);
1875
1876     gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1877   }
1878
1879   /* Prefer an X87 emwarn over an XMM one, if both exist. */
1880   if (warnX87 != EmNote_NONE)
1881      return warnX87;
1882   else
1883      return warnXMM;
1884}
1885
1886
1887/* CALLED FROM GENERATED CODE */
1888/* DIRTY HELPER (reads guest state, writes guest mem) */
1889void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1890{
1891   do_get_x87( gst, (UChar*)addr );
1892}
1893
1894/* CALLED FROM GENERATED CODE */
1895/* DIRTY HELPER (writes guest state, reads guest mem) */
1896VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1897{
1898   return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1899}
1900
1901/* CALLED FROM GENERATED CODE */
1902/* DIRTY HELPER (reads guest state, writes guest mem) */
1903void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1904{
1905   /* Somewhat roundabout, but at least it's simple. */
1906   Int       i;
1907   UShort*   addrP = (UShort*)addr;
1908   Fpu_State tmp;
1909   do_get_x87( gst, (UChar*)&tmp );
1910   for (i = 0; i < 14; i++)
1911      addrP[i] = tmp.env[i];
1912}
1913
1914/* CALLED FROM GENERATED CODE */
1915/* DIRTY HELPER (writes guest state, reads guest mem) */
1916VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1917{
1918   return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1919}
1920
1921/* VISIBLE TO LIBVEX CLIENT */
1922/* Do x87 save from the supplied VexGuestX86State structure and store the
1923   result at the given address which represents a buffer of at least 108
1924   bytes. */
1925void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1926                               /*OUT*/UChar* x87_state )
1927{
1928   do_get_x87 ( vex_state, x87_state );
1929}
1930
1931/* VISIBLE TO LIBVEX CLIENT */
1932/* Do x87 restore from the supplied address and store read values to the given
1933   VexGuestX86State structure. */
1934VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1935                                    /*MOD*/VexGuestX86State* vex_state )
1936{
1937   return do_put_x87 ( True/*moveRegs*/, x87_state, vex_state );
1938}
1939
1940/* VISIBLE TO LIBVEX CLIENT */
1941/* Return mxcsr from the supplied VexGuestX86State structure. */
1942UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1943{
1944   return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1945}
1946
1947/* VISIBLE TO LIBVEX CLIENT */
1948/* Modify the given VexGuestX86State structure according to the passed mxcsr
1949   value. */
1950VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1951                                      /*MOD*/VexGuestX86State* vex_state)
1952{
1953   ULong w64 = x86g_check_ldmxcsr( mxcsr );
1954   vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1955   return (VexEmNote)(w64 >> 32);
1956}
1957
1958/*---------------------------------------------------------------*/
1959/*--- Misc integer helpers, including rotates and CPUID.      ---*/
1960/*---------------------------------------------------------------*/
1961
1962/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1963/* Calculate both flags and value result for rotate right
1964   through the carry bit.  Result in low 32 bits,
1965   new flags (OSZACP) in high 32 bits.
1966*/
1967ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1968{
1969   UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1970
1971   switch (sz) {
1972      case 4:
1973         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1974         of        = ((arg >> 31) ^ cf) & 1;
1975         while (tempCOUNT > 0) {
1976            tempcf = arg & 1;
1977            arg    = (arg >> 1) | (cf << 31);
1978            cf     = tempcf;
1979            tempCOUNT--;
1980         }
1981         break;
1982      case 2:
1983         while (tempCOUNT >= 17) tempCOUNT -= 17;
1984         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1985         of        = ((arg >> 15) ^ cf) & 1;
1986         while (tempCOUNT > 0) {
1987            tempcf = arg & 1;
1988            arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
1989            cf     = tempcf;
1990            tempCOUNT--;
1991         }
1992         break;
1993      case 1:
1994         while (tempCOUNT >= 9) tempCOUNT -= 9;
1995         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1996         of        = ((arg >> 7) ^ cf) & 1;
1997         while (tempCOUNT > 0) {
1998            tempcf = arg & 1;
1999            arg    = ((arg >> 1) & 0x7F) | (cf << 7);
2000            cf     = tempcf;
2001            tempCOUNT--;
2002         }
2003         break;
2004      default:
2005         vpanic("calculate_RCR: invalid size");
2006   }
2007
2008   cf &= 1;
2009   of &= 1;
2010   eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2011   eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2012
2013   return (((ULong)eflags_in) << 32) | ((ULong)arg);
2014}
2015
2016
2017/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2018/* Calculate both flags and value result for rotate left
2019   through the carry bit.  Result in low 32 bits,
2020   new flags (OSZACP) in high 32 bits.
2021*/
2022ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2023{
2024   UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2025
2026   switch (sz) {
2027      case 4:
2028         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2029         while (tempCOUNT > 0) {
2030            tempcf = (arg >> 31) & 1;
2031            arg    = (arg << 1) | (cf & 1);
2032            cf     = tempcf;
2033            tempCOUNT--;
2034         }
2035         of = ((arg >> 31) ^ cf) & 1;
2036         break;
2037      case 2:
2038         while (tempCOUNT >= 17) tempCOUNT -= 17;
2039         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2040         while (tempCOUNT > 0) {
2041            tempcf = (arg >> 15) & 1;
2042            arg    = 0xFFFF & ((arg << 1) | (cf & 1));
2043            cf     = tempcf;
2044            tempCOUNT--;
2045         }
2046         of = ((arg >> 15) ^ cf) & 1;
2047         break;
2048      case 1:
2049         while (tempCOUNT >= 9) tempCOUNT -= 9;
2050         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2051         while (tempCOUNT > 0) {
2052            tempcf = (arg >> 7) & 1;
2053            arg    = 0xFF & ((arg << 1) | (cf & 1));
2054            cf     = tempcf;
2055            tempCOUNT--;
2056         }
2057         of = ((arg >> 7) ^ cf) & 1;
2058         break;
2059      default:
2060         vpanic("calculate_RCL: invalid size");
2061   }
2062
2063   cf &= 1;
2064   of &= 1;
2065   eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2066   eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2067
2068   return (((ULong)eflags_in) << 32) | ((ULong)arg);
2069}
2070
2071
2072/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2073/* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2074   AX value in low half of arg, OSZACP in upper half.
2075   See guest-x86/toIR.c usage point for details.
2076*/
2077static UInt calc_parity_8bit ( UInt w32 ) {
2078   UInt i;
2079   UInt p = 1;
2080   for (i = 0; i < 8; i++)
2081      p ^= (1 & (w32 >> i));
2082   return p;
2083}
2084UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2085{
2086   UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2087   UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2088   UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2089   UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2090   UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2091   UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2092   UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2093   UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2094   UInt result = 0;
2095
2096   switch (opcode) {
2097      case 0x27: { /* DAA */
2098         UInt old_AL = r_AL;
2099         UInt old_C  = r_C;
2100         r_C = 0;
2101         if ((r_AL & 0xF) > 9 || r_A == 1) {
2102            r_AL = r_AL + 6;
2103            r_C  = old_C;
2104            if (r_AL >= 0x100) r_C = 1;
2105            r_A = 1;
2106         } else {
2107            r_A = 0;
2108         }
2109         if (old_AL > 0x99 || old_C == 1) {
2110            r_AL = r_AL + 0x60;
2111            r_C  = 1;
2112         } else {
2113            r_C = 0;
2114         }
2115         /* O is undefined.  S Z and P are set according to the
2116	    result. */
2117         r_AL &= 0xFF;
2118         r_O = 0; /* let's say */
2119         r_S = (r_AL & 0x80) ? 1 : 0;
2120         r_Z = (r_AL == 0) ? 1 : 0;
2121         r_P = calc_parity_8bit( r_AL );
2122         break;
2123      }
2124      case 0x2F: { /* DAS */
2125         UInt old_AL = r_AL;
2126         UInt old_C  = r_C;
2127         r_C = 0;
2128         if ((r_AL & 0xF) > 9 || r_A == 1) {
2129            Bool borrow = r_AL < 6;
2130            r_AL = r_AL - 6;
2131            r_C  = old_C;
2132            if (borrow) r_C = 1;
2133            r_A = 1;
2134         } else {
2135            r_A = 0;
2136         }
2137         if (old_AL > 0x99 || old_C == 1) {
2138            r_AL = r_AL - 0x60;
2139            r_C  = 1;
2140         } else {
2141            /* Intel docs are wrong: r_C = 0; */
2142         }
2143         /* O is undefined.  S Z and P are set according to the
2144	    result. */
2145         r_AL &= 0xFF;
2146         r_O = 0; /* let's say */
2147         r_S = (r_AL & 0x80) ? 1 : 0;
2148         r_Z = (r_AL == 0) ? 1 : 0;
2149         r_P = calc_parity_8bit( r_AL );
2150         break;
2151      }
2152      case 0x37: { /* AAA */
2153         Bool nudge = r_AL > 0xF9;
2154         if ((r_AL & 0xF) > 9 || r_A == 1) {
2155            r_AL = r_AL + 6;
2156            r_AH = r_AH + 1 + (nudge ? 1 : 0);
2157            r_A  = 1;
2158            r_C  = 1;
2159            r_AL = r_AL & 0xF;
2160         } else {
2161            r_A  = 0;
2162            r_C  = 0;
2163            r_AL = r_AL & 0xF;
2164         }
2165         /* O S Z and P are undefined. */
2166         r_O = r_S = r_Z = r_P = 0; /* let's say */
2167         break;
2168      }
2169      case 0x3F: { /* AAS */
2170         Bool nudge = r_AL < 0x06;
2171         if ((r_AL & 0xF) > 9 || r_A == 1) {
2172            r_AL = r_AL - 6;
2173            r_AH = r_AH - 1 - (nudge ? 1 : 0);
2174            r_A  = 1;
2175            r_C  = 1;
2176            r_AL = r_AL & 0xF;
2177         } else {
2178            r_A  = 0;
2179            r_C  = 0;
2180            r_AL = r_AL & 0xF;
2181         }
2182         /* O S Z and P are undefined. */
2183         r_O = r_S = r_Z = r_P = 0; /* let's say */
2184         break;
2185      }
2186      default:
2187         vassert(0);
2188   }
2189   result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2190            | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2191            | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2192            | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2193            | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2194            | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2195            | ( (r_AH & 0xFF) << 8 )
2196            | ( (r_AL & 0xFF) << 0 );
2197   return result;
2198}
2199
2200UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2201{
2202   UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2203   UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2204   UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2205   UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2206   UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2207   UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2208   UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2209   UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2210   UInt result = 0;
2211
2212   switch (opcode) {
2213      case 0xD4: { /* AAM */
2214         r_AH = r_AL / 10;
2215         r_AL = r_AL % 10;
2216         break;
2217      }
2218      case 0xD5: { /* AAD */
2219         r_AL = ((r_AH * 10) + r_AL) & 0xff;
2220         r_AH = 0;
2221         break;
2222      }
2223      default:
2224         vassert(0);
2225   }
2226
2227   r_O = 0; /* let's say (undefined) */
2228   r_C = 0; /* let's say (undefined) */
2229   r_A = 0; /* let's say (undefined) */
2230   r_S = (r_AL & 0x80) ? 1 : 0;
2231   r_Z = (r_AL == 0) ? 1 : 0;
2232   r_P = calc_parity_8bit( r_AL );
2233
2234   result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2235            | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2236            | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2237            | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2238            | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2239            | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2240            | ( (r_AH & 0xFF) << 8 )
2241            | ( (r_AL & 0xFF) << 0 );
2242   return result;
2243}
2244
2245
2246/* CALLED FROM GENERATED CODE */
2247/* DIRTY HELPER (non-referentially-transparent) */
2248/* Horrible hack.  On non-x86 platforms, return 1. */
2249ULong x86g_dirtyhelper_RDTSC ( void )
2250{
2251#  if defined(__i386__)
2252   ULong res;
2253   __asm__ __volatile__("rdtsc" : "=A" (res));
2254   return res;
2255#  else
2256   return 1ULL;
2257#  endif
2258}
2259
2260
2261/* CALLED FROM GENERATED CODE */
2262/* DIRTY HELPER (modifies guest state) */
2263/* Claim to be a P55C (Intel Pentium/MMX) */
2264void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2265{
2266   switch (st->guest_EAX) {
2267      case 0:
2268         st->guest_EAX = 0x1;
2269         st->guest_EBX = 0x756e6547;
2270         st->guest_ECX = 0x6c65746e;
2271         st->guest_EDX = 0x49656e69;
2272         break;
2273      default:
2274         st->guest_EAX = 0x543;
2275         st->guest_EBX = 0x0;
2276         st->guest_ECX = 0x0;
2277         st->guest_EDX = 0x8001bf;
2278         break;
2279   }
2280}
2281
2282/* CALLED FROM GENERATED CODE */
2283/* DIRTY HELPER (modifies guest state) */
2284/* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2285/* But without 3DNow support (weird, but we really don't support it). */
2286void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2287{
2288   switch (st->guest_EAX) {
2289      /* vendor ID */
2290      case 0:
2291         st->guest_EAX = 0x1;
2292         st->guest_EBX = 0x68747541;
2293         st->guest_ECX = 0x444d4163;
2294         st->guest_EDX = 0x69746e65;
2295         break;
2296      /* feature bits */
2297      case 1:
2298         st->guest_EAX = 0x621;
2299         st->guest_EBX = 0x0;
2300         st->guest_ECX = 0x0;
2301         st->guest_EDX = 0x183f9ff;
2302         break;
2303      /* Highest Extended Function Supported (0x80000004 brand string) */
2304      case 0x80000000:
2305         st->guest_EAX = 0x80000004;
2306         st->guest_EBX = 0x68747541;
2307         st->guest_ECX = 0x444d4163;
2308         st->guest_EDX = 0x69746e65;
2309         break;
2310      /* Extended Processor Info and Feature Bits */
2311      case 0x80000001:
2312         st->guest_EAX = 0x721;
2313         st->guest_EBX = 0x0;
2314         st->guest_ECX = 0x0;
2315         st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2316         break;
2317      /* Processor Brand String "AMD Athlon(tm) Processor" */
2318      case 0x80000002:
2319         st->guest_EAX = 0x20444d41;
2320         st->guest_EBX = 0x6c687441;
2321         st->guest_ECX = 0x74286e6f;
2322         st->guest_EDX = 0x5020296d;
2323         break;
2324      case 0x80000003:
2325         st->guest_EAX = 0x65636f72;
2326         st->guest_EBX = 0x726f7373;
2327         st->guest_ECX = 0x0;
2328         st->guest_EDX = 0x0;
2329         break;
2330      default:
2331         st->guest_EAX = 0x0;
2332         st->guest_EBX = 0x0;
2333         st->guest_ECX = 0x0;
2334         st->guest_EDX = 0x0;
2335         break;
2336   }
2337}
2338
2339/* CALLED FROM GENERATED CODE */
2340/* DIRTY HELPER (modifies guest state) */
2341/* Claim to be the following SSE1-capable CPU:
2342   vendor_id       : GenuineIntel
2343   cpu family      : 6
2344   model           : 11
2345   model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2346   stepping        : 1
2347   cpu MHz         : 1131.013
2348   cache size      : 512 KB
2349*/
2350void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2351{
2352   switch (st->guest_EAX) {
2353      case 0:
2354         st->guest_EAX = 0x00000002;
2355         st->guest_EBX = 0x756e6547;
2356         st->guest_ECX = 0x6c65746e;
2357         st->guest_EDX = 0x49656e69;
2358         break;
2359      case 1:
2360         st->guest_EAX = 0x000006b1;
2361         st->guest_EBX = 0x00000004;
2362         st->guest_ECX = 0x00000000;
2363         st->guest_EDX = 0x0383fbff;
2364         break;
2365      default:
2366         st->guest_EAX = 0x03020101;
2367         st->guest_EBX = 0x00000000;
2368         st->guest_ECX = 0x00000000;
2369         st->guest_EDX = 0x0c040883;
2370         break;
2371   }
2372}
2373
2374/* Claim to be the following SSE2-capable CPU:
2375   vendor_id    : GenuineIntel
2376   cpu family   : 15
2377   model        : 2
2378   model name   : Intel(R) Pentium(R) 4 CPU 3.00GHz
2379   stepping     : 9
2380   microcode    : 0x17
2381   cpu MHz      : 2992.577
2382   cache size   : 512 KB
2383   flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2384                  pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2385                   pebs bts cid xtpr
2386   clflush size : 64
2387   cache_alignment : 128
2388   address sizes : 36 bits physical, 32 bits virtual
2389*/
2390void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2391{
2392   switch (st->guest_EAX) {
2393      case 0:
2394         st->guest_EAX = 0x00000002;
2395         st->guest_EBX = 0x756e6547;
2396         st->guest_ECX = 0x6c65746e;
2397         st->guest_EDX = 0x49656e69;
2398         break;
2399      case 1:
2400         st->guest_EAX = 0x00000f29;
2401         st->guest_EBX = 0x01020809;
2402         st->guest_ECX = 0x00004400;
2403         st->guest_EDX = 0xbfebfbff;
2404         break;
2405      default:
2406         st->guest_EAX = 0x03020101;
2407         st->guest_EBX = 0x00000000;
2408         st->guest_ECX = 0x00000000;
2409         st->guest_EDX = 0x0c040883;
2410         break;
2411   }
2412}
2413
2414/* Claim to be the following SSSE3-capable CPU (2 x ...):
2415   vendor_id       : GenuineIntel
2416   cpu family      : 6
2417   model           : 15
2418   model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2419   stepping        : 6
2420   cpu MHz         : 2394.000
2421   cache size      : 4096 KB
2422   physical id     : 0
2423   siblings        : 2
2424   core id         : 0
2425   cpu cores       : 2
2426   fpu             : yes
2427   fpu_exception   : yes
2428   cpuid level     : 10
2429   wp              : yes
2430   flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2431                     mtrr pge mca cmov pat pse36 clflush dts acpi
2432                     mmx fxsr sse sse2 ss ht tm syscall nx lm
2433                     constant_tsc pni monitor ds_cpl vmx est tm2
2434                     cx16 xtpr lahf_lm
2435   bogomips        : 4798.78
2436   clflush size    : 64
2437   cache_alignment : 64
2438   address sizes   : 36 bits physical, 48 bits virtual
2439   power management:
2440*/
2441void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2442{
2443#  define SET_ABCD(_a,_b,_c,_d)               \
2444      do { st->guest_EAX = (UInt)(_a);        \
2445           st->guest_EBX = (UInt)(_b);        \
2446           st->guest_ECX = (UInt)(_c);        \
2447           st->guest_EDX = (UInt)(_d);        \
2448      } while (0)
2449
2450   switch (st->guest_EAX) {
2451      case 0x00000000:
2452         SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2453         break;
2454      case 0x00000001:
2455         SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2456         break;
2457      case 0x00000002:
2458         SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2459         break;
2460      case 0x00000003:
2461         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2462         break;
2463      case 0x00000004: {
2464         switch (st->guest_ECX) {
2465            case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2466                                      0x0000003f, 0x00000001); break;
2467            case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2468                                      0x0000003f, 0x00000001); break;
2469            case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2470                                      0x00000fff, 0x00000001); break;
2471            default:         SET_ABCD(0x00000000, 0x00000000,
2472                                      0x00000000, 0x00000000); break;
2473         }
2474         break;
2475      }
2476      case 0x00000005:
2477         SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2478         break;
2479      case 0x00000006:
2480         SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2481         break;
2482      case 0x00000007:
2483         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2484         break;
2485      case 0x00000008:
2486         SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2487         break;
2488      case 0x00000009:
2489         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2490         break;
2491      case 0x0000000a:
2492      unhandled_eax_value:
2493         SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2494         break;
2495      case 0x80000000:
2496         SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2497         break;
2498      case 0x80000001:
2499         SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2500         break;
2501      case 0x80000002:
2502         SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2503         break;
2504      case 0x80000003:
2505         SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2506         break;
2507      case 0x80000004:
2508         SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2509         break;
2510      case 0x80000005:
2511         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2512         break;
2513      case 0x80000006:
2514         SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2515         break;
2516      case 0x80000007:
2517         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2518         break;
2519      case 0x80000008:
2520         SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2521         break;
2522      default:
2523         goto unhandled_eax_value;
2524   }
2525#  undef SET_ABCD
2526}
2527
2528
2529/* CALLED FROM GENERATED CODE */
2530/* DIRTY HELPER (non-referentially-transparent) */
2531/* Horrible hack.  On non-x86 platforms, return 0. */
2532UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2533{
2534#  if defined(__i386__)
2535   UInt r = 0;
2536   portno &= 0xFFFF;
2537   switch (sz) {
2538      case 4:
2539         __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2540                              : "=a" (r) : "Nd" (portno));
2541	 break;
2542      case 2:
2543         __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2544                              : "=a" (r) : "Nd" (portno));
2545	 break;
2546      case 1:
2547         __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2548                              : "=a" (r) : "Nd" (portno));
2549	 break;
2550      default:
2551         break;
2552   }
2553   return r;
2554#  else
2555   return 0;
2556#  endif
2557}
2558
2559
2560/* CALLED FROM GENERATED CODE */
2561/* DIRTY HELPER (non-referentially-transparent) */
2562/* Horrible hack.  On non-x86 platforms, do nothing. */
2563void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2564{
2565#  if defined(__i386__)
2566   portno &= 0xFFFF;
2567   switch (sz) {
2568      case 4:
2569         __asm__ __volatile__("outl %0, %w1"
2570                              : : "a" (data), "Nd" (portno));
2571	 break;
2572      case 2:
2573         __asm__ __volatile__("outw %w0, %w1"
2574                              : : "a" (data), "Nd" (portno));
2575	 break;
2576      case 1:
2577         __asm__ __volatile__("outb %b0, %w1"
2578                              : : "a" (data), "Nd" (portno));
2579	 break;
2580      default:
2581         break;
2582   }
2583#  else
2584   /* do nothing */
2585#  endif
2586}
2587
2588/* CALLED FROM GENERATED CODE */
2589/* DIRTY HELPER (non-referentially-transparent) */
2590/* Horrible hack.  On non-x86 platforms, do nothing. */
2591/* op = 0: call the native SGDT instruction.
2592   op = 1: call the native SIDT instruction.
2593*/
2594void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2595#  if defined(__i386__)
2596   switch (op) {
2597      case 0:
2598         __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2599         break;
2600      case 1:
2601         __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2602         break;
2603      default:
2604         vpanic("x86g_dirtyhelper_SxDT");
2605   }
2606#  else
2607   /* do nothing */
2608   UChar* p = (UChar*)address;
2609   p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2610#  endif
2611}
2612
2613/*---------------------------------------------------------------*/
2614/*--- Helpers for MMX/SSE/SSE2.                               ---*/
2615/*---------------------------------------------------------------*/
2616
2617static inline UChar abdU8 ( UChar xx, UChar yy ) {
2618   return toUChar(xx>yy ? xx-yy : yy-xx);
2619}
2620
2621static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2622   return (((ULong)w1) << 32) | ((ULong)w0);
2623}
2624
2625static inline UShort sel16x4_3 ( ULong w64 ) {
2626   UInt hi32 = toUInt(w64 >> 32);
2627   return toUShort(hi32 >> 16);
2628}
2629static inline UShort sel16x4_2 ( ULong w64 ) {
2630   UInt hi32 = toUInt(w64 >> 32);
2631   return toUShort(hi32);
2632}
2633static inline UShort sel16x4_1 ( ULong w64 ) {
2634   UInt lo32 = toUInt(w64);
2635   return toUShort(lo32 >> 16);
2636}
2637static inline UShort sel16x4_0 ( ULong w64 ) {
2638   UInt lo32 = toUInt(w64);
2639   return toUShort(lo32);
2640}
2641
2642static inline UChar sel8x8_7 ( ULong w64 ) {
2643   UInt hi32 = toUInt(w64 >> 32);
2644   return toUChar(hi32 >> 24);
2645}
2646static inline UChar sel8x8_6 ( ULong w64 ) {
2647   UInt hi32 = toUInt(w64 >> 32);
2648   return toUChar(hi32 >> 16);
2649}
2650static inline UChar sel8x8_5 ( ULong w64 ) {
2651   UInt hi32 = toUInt(w64 >> 32);
2652   return toUChar(hi32 >> 8);
2653}
2654static inline UChar sel8x8_4 ( ULong w64 ) {
2655   UInt hi32 = toUInt(w64 >> 32);
2656   return toUChar(hi32 >> 0);
2657}
2658static inline UChar sel8x8_3 ( ULong w64 ) {
2659   UInt lo32 = toUInt(w64);
2660   return toUChar(lo32 >> 24);
2661}
2662static inline UChar sel8x8_2 ( ULong w64 ) {
2663   UInt lo32 = toUInt(w64);
2664   return toUChar(lo32 >> 16);
2665}
2666static inline UChar sel8x8_1 ( ULong w64 ) {
2667   UInt lo32 = toUInt(w64);
2668   return toUChar(lo32 >> 8);
2669}
2670static inline UChar sel8x8_0 ( ULong w64 ) {
2671   UInt lo32 = toUInt(w64);
2672   return toUChar(lo32 >> 0);
2673}
2674
2675/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2676ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2677{
2678   return
2679      mk32x2(
2680         (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2681            + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2682         (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2683            + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2684      );
2685}
2686
2687/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2688ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2689{
2690   UInt t = 0;
2691   t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2692   t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2693   t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2694   t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2695   t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2696   t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2697   t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2698   t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2699   t &= 0xFFFF;
2700   return (ULong)t;
2701}
2702
2703
2704/*---------------------------------------------------------------*/
2705/*--- Helpers for dealing with segment overrides.             ---*/
2706/*---------------------------------------------------------------*/
2707
2708static inline
2709UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2710{
2711   UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2712   UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2713   UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2714   return (hi << 24) | (mid << 16) | lo;
2715}
2716
2717static inline
2718UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2719{
2720    UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2721    UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2722    UInt limit = (hi << 16) | lo;
2723    if (ent->LdtEnt.Bits.Granularity)
2724       limit = (limit << 12) | 0xFFF;
2725    return limit;
2726}
2727
2728/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2729ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2730                              UInt seg_selector, UInt virtual_addr )
2731{
2732   UInt tiBit, base, limit;
2733   VexGuestX86SegDescr* the_descrs;
2734
2735   Bool verboze = False;
2736
2737   /* If this isn't true, we're in Big Trouble. */
2738   vassert(8 == sizeof(VexGuestX86SegDescr));
2739
2740   if (verboze)
2741      vex_printf("x86h_use_seg_selector: "
2742                 "seg_selector = 0x%x, vaddr = 0x%x\n",
2743                 seg_selector, virtual_addr);
2744
2745   /* Check for wildly invalid selector. */
2746   if (seg_selector & ~0xFFFF)
2747      goto bad;
2748
2749   seg_selector &= 0x0000FFFF;
2750
2751   /* Sanity check the segment selector.  Ensure that RPL=11b (least
2752      privilege).  This forms the bottom 2 bits of the selector. */
2753   if ((seg_selector & 3) != 3)
2754      goto bad;
2755
2756   /* Extract the TI bit (0 means GDT, 1 means LDT) */
2757   tiBit = (seg_selector >> 2) & 1;
2758
2759   /* Convert the segment selector onto a table index */
2760   seg_selector >>= 3;
2761   vassert(seg_selector >= 0 && seg_selector < 8192);
2762
2763   if (tiBit == 0) {
2764
2765      /* GDT access. */
2766      /* Do we actually have a GDT to look at? */
2767      if (gdt == 0)
2768         goto bad;
2769
2770      /* Check for access to non-existent entry. */
2771      if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2772         goto bad;
2773
2774      the_descrs = (VexGuestX86SegDescr*)gdt;
2775      base  = get_segdescr_base (&the_descrs[seg_selector]);
2776      limit = get_segdescr_limit(&the_descrs[seg_selector]);
2777
2778   } else {
2779
2780      /* All the same stuff, except for the LDT. */
2781      if (ldt == 0)
2782         goto bad;
2783
2784      if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2785         goto bad;
2786
2787      the_descrs = (VexGuestX86SegDescr*)ldt;
2788      base  = get_segdescr_base (&the_descrs[seg_selector]);
2789      limit = get_segdescr_limit(&the_descrs[seg_selector]);
2790
2791   }
2792
2793   /* Do the limit check.  Note, this check is just slightly too
2794      slack.  Really it should be "if (virtual_addr + size - 1 >=
2795      limit)," but we don't have the size info to hand.  Getting it
2796      could be significantly complex.  */
2797   if (virtual_addr >= limit)
2798      goto bad;
2799
2800   if (verboze)
2801      vex_printf("x86h_use_seg_selector: "
2802                 "base = 0x%x, addr = 0x%x\n",
2803                 base, base + virtual_addr);
2804
2805   /* High 32 bits are zero, indicating success. */
2806   return (ULong)( ((UInt)virtual_addr) + base );
2807
2808 bad:
2809   return 1ULL << 32;
2810}
2811
2812
2813/*---------------------------------------------------------------*/
2814/*--- Helpers for dealing with, and describing,               ---*/
2815/*--- guest state as a whole.                                 ---*/
2816/*---------------------------------------------------------------*/
2817
2818/* Initialise the entire x86 guest state. */
2819/* VISIBLE TO LIBVEX CLIENT */
2820void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2821{
2822   vex_state->host_EvC_FAILADDR = 0;
2823   vex_state->host_EvC_COUNTER = 0;
2824
2825   vex_state->guest_EAX = 0;
2826   vex_state->guest_ECX = 0;
2827   vex_state->guest_EDX = 0;
2828   vex_state->guest_EBX = 0;
2829   vex_state->guest_ESP = 0;
2830   vex_state->guest_EBP = 0;
2831   vex_state->guest_ESI = 0;
2832   vex_state->guest_EDI = 0;
2833
2834   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2835   vex_state->guest_CC_DEP1 = 0;
2836   vex_state->guest_CC_DEP2 = 0;
2837   vex_state->guest_CC_NDEP = 0;
2838   vex_state->guest_DFLAG   = 1; /* forwards */
2839   vex_state->guest_IDFLAG  = 0;
2840   vex_state->guest_ACFLAG  = 0;
2841
2842   vex_state->guest_EIP = 0;
2843
2844   /* Initialise the simulated FPU */
2845   x86g_dirtyhelper_FINIT( vex_state );
2846
2847   /* Initialse the SSE state. */
2848#  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2849
2850   vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2851   SSEZERO(vex_state->guest_XMM0);
2852   SSEZERO(vex_state->guest_XMM1);
2853   SSEZERO(vex_state->guest_XMM2);
2854   SSEZERO(vex_state->guest_XMM3);
2855   SSEZERO(vex_state->guest_XMM4);
2856   SSEZERO(vex_state->guest_XMM5);
2857   SSEZERO(vex_state->guest_XMM6);
2858   SSEZERO(vex_state->guest_XMM7);
2859
2860#  undef SSEZERO
2861
2862   vex_state->guest_CS  = 0;
2863   vex_state->guest_DS  = 0;
2864   vex_state->guest_ES  = 0;
2865   vex_state->guest_FS  = 0;
2866   vex_state->guest_GS  = 0;
2867   vex_state->guest_SS  = 0;
2868   vex_state->guest_LDT = 0;
2869   vex_state->guest_GDT = 0;
2870
2871   vex_state->guest_EMNOTE = EmNote_NONE;
2872
2873   /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2874   vex_state->guest_CMSTART = 0;
2875   vex_state->guest_CMLEN   = 0;
2876
2877   vex_state->guest_NRADDR   = 0;
2878   vex_state->guest_SC_CLASS = 0;
2879   vex_state->guest_IP_AT_SYSCALL = 0;
2880
2881   vex_state->padding1 = 0;
2882}
2883
2884
2885/* Figure out if any part of the guest state contained in minoff
2886   .. maxoff requires precise memory exceptions.  If in doubt return
2887   True (but this generates significantly slower code).
2888
2889   By default we enforce precise exns for guest %ESP, %EBP and %EIP
2890   only.  These are the minimum needed to extract correct stack
2891   backtraces from x86 code.
2892
2893   Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2894*/
2895Bool guest_x86_state_requires_precise_mem_exns (
2896        Int minoff, Int maxoff, VexRegisterUpdates pxControl
2897     )
2898{
2899   Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2900   Int ebp_max = ebp_min + 4 - 1;
2901   Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2902   Int esp_max = esp_min + 4 - 1;
2903   Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2904   Int eip_max = eip_min + 4 - 1;
2905
2906   if (maxoff < esp_min || minoff > esp_max) {
2907      /* no overlap with esp */
2908      if (pxControl == VexRegUpdSpAtMemAccess)
2909         return False; // We only need to check stack pointer.
2910   } else {
2911      return True;
2912   }
2913
2914   if (maxoff < ebp_min || minoff > ebp_max) {
2915      /* no overlap with ebp */
2916   } else {
2917      return True;
2918   }
2919
2920   if (maxoff < eip_min || minoff > eip_max) {
2921      /* no overlap with eip */
2922   } else {
2923      return True;
2924   }
2925
2926   return False;
2927}
2928
2929
2930#define ALWAYSDEFD(field)                           \
2931    { offsetof(VexGuestX86State, field),            \
2932      (sizeof ((VexGuestX86State*)0)->field) }
2933
2934VexGuestLayout
2935   x86guest_layout
2936      = {
2937          /* Total size of the guest state, in bytes. */
2938          .total_sizeB = sizeof(VexGuestX86State),
2939
2940          /* Describe the stack pointer. */
2941          .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2942          .sizeof_SP = 4,
2943
2944          /* Describe the frame pointer. */
2945          .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2946          .sizeof_FP = 4,
2947
2948          /* Describe the instruction pointer. */
2949          .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2950          .sizeof_IP = 4,
2951
2952          /* Describe any sections to be regarded by Memcheck as
2953             'always-defined'. */
2954          .n_alwaysDefd = 24,
2955
2956          /* flags thunk: OP and NDEP are always defd, whereas DEP1
2957             and DEP2 have to be tracked.  See detailed comment in
2958             gdefs.h on meaning of thunk fields. */
2959          .alwaysDefd
2960             = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2961                 /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2962                 /*  2 */ ALWAYSDEFD(guest_DFLAG),
2963                 /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2964                 /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2965                 /*  5 */ ALWAYSDEFD(guest_EIP),
2966                 /*  6 */ ALWAYSDEFD(guest_FTOP),
2967                 /*  7 */ ALWAYSDEFD(guest_FPTAG),
2968                 /*  8 */ ALWAYSDEFD(guest_FPROUND),
2969                 /*  9 */ ALWAYSDEFD(guest_FC3210),
2970                 /* 10 */ ALWAYSDEFD(guest_CS),
2971                 /* 11 */ ALWAYSDEFD(guest_DS),
2972                 /* 12 */ ALWAYSDEFD(guest_ES),
2973                 /* 13 */ ALWAYSDEFD(guest_FS),
2974                 /* 14 */ ALWAYSDEFD(guest_GS),
2975                 /* 15 */ ALWAYSDEFD(guest_SS),
2976                 /* 16 */ ALWAYSDEFD(guest_LDT),
2977                 /* 17 */ ALWAYSDEFD(guest_GDT),
2978                 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2979                 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2980                 /* 20 */ ALWAYSDEFD(guest_CMSTART),
2981                 /* 21 */ ALWAYSDEFD(guest_CMLEN),
2982                 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2983                 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2984               }
2985        };
2986
2987
2988/*---------------------------------------------------------------*/
2989/*--- end                                 guest_x86_helpers.c ---*/
2990/*---------------------------------------------------------------*/
2991