1
2/*---------------------------------------------------------------*/
3/*--- begin                               guest_x86_helpers.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_emnote.h"
38#include "libvex_guest_x86.h"
39#include "libvex_ir.h"
40#include "libvex.h"
41
42#include "main_util.h"
43#include "main_globals.h"
44#include "guest_generic_bb_to_IR.h"
45#include "guest_x86_defs.h"
46#include "guest_generic_x87.h"
47
48
49/* This file contains helper functions for x86 guest code.
50   Calls to these functions are generated by the back end.
51   These calls are of course in the host machine code and
52   this file will be compiled to host machine code, so that
53   all makes sense.
54
55   Only change the signatures of these helper functions very
56   carefully.  If you change the signature here, you'll have to change
57   the parameters passed to it in the IR calls constructed by
58   guest-x86/toIR.c.
59
60   The convention used is that all functions called from generated
61   code are named x86g_<something>, and any function whose name lacks
62   that prefix is not called from generated code.  Note that some
63   LibVEX_* functions can however be called by VEX's client, but that
64   is not the same as calling them from VEX-generated code.
65*/
66
67
68/* Set to 1 to get detailed profiling info about use of the flag
69   machinery. */
70#define PROFILE_EFLAGS 0
71
72
73/*---------------------------------------------------------------*/
74/*--- %eflags run-time helpers.                               ---*/
75/*---------------------------------------------------------------*/
76
77static const UChar parity_table[256] = {
78    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110};
111
112/* generalised left-shifter */
113inline static Int lshift ( Int x, Int n )
114{
115   if (n >= 0)
116      return (UInt)x << n;
117   else
118      return x >> (-n);
119}
120
121/* identity on ULong */
122static inline ULong idULong ( ULong x )
123{
124   return x;
125}
126
127
128#define PREAMBLE(__data_bits)					\
129   /* const */ UInt DATA_MASK 					\
130      = __data_bits==8 ? 0xFF 					\
131                       : (__data_bits==16 ? 0xFFFF 		\
132                                          : 0xFFFFFFFF); 	\
133   /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
134   /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
135   /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
136   /* const */ UInt CC_NDEP = cc_ndep_formal;			\
137   /* Four bogus assignments, which hopefully gcc can     */	\
138   /* optimise away, and which stop it complaining about  */	\
139   /* unused variables.                                   */	\
140   SIGN_MASK = SIGN_MASK;					\
141   DATA_MASK = DATA_MASK;					\
142   CC_DEP2 = CC_DEP2;						\
143   CC_NDEP = CC_NDEP;
144
145
146/*-------------------------------------------------------------*/
147
148#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
149{								\
150   PREAMBLE(DATA_BITS);						\
151   { UInt cf, pf, af, zf, sf, of;				\
152     UInt argL, argR, res;					\
153     argL = CC_DEP1;						\
154     argR = CC_DEP2;						\
155     res  = argL + argR;					\
156     cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
157     pf = parity_table[(UChar)res];				\
158     af = (res ^ argL ^ argR) & 0x10;				\
159     zf = ((DATA_UTYPE)res == 0) << 6;				\
160     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
161     of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
162                 12 - DATA_BITS) & X86G_CC_MASK_O;		\
163     return cf | pf | af | zf | sf | of;			\
164   }								\
165}
166
167/*-------------------------------------------------------------*/
168
169#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
170{								\
171   PREAMBLE(DATA_BITS);						\
172   { UInt cf, pf, af, zf, sf, of;				\
173     UInt argL, argR, res;					\
174     argL = CC_DEP1;						\
175     argR = CC_DEP2;						\
176     res  = argL - argR;					\
177     cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
178     pf = parity_table[(UChar)res];				\
179     af = (res ^ argL ^ argR) & 0x10;				\
180     zf = ((DATA_UTYPE)res == 0) << 6;				\
181     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
182     of = lshift((argL ^ argR) & (argL ^ res),	 		\
183                 12 - DATA_BITS) & X86G_CC_MASK_O; 		\
184     return cf | pf | af | zf | sf | of;			\
185   }								\
186}
187
188/*-------------------------------------------------------------*/
189
190#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
191{								\
192   PREAMBLE(DATA_BITS);						\
193   { UInt cf, pf, af, zf, sf, of;				\
194     UInt argL, argR, oldC, res;		       		\
195     oldC = CC_NDEP & X86G_CC_MASK_C;				\
196     argL = CC_DEP1;						\
197     argR = CC_DEP2 ^ oldC;	       				\
198     res  = (argL + argR) + oldC;				\
199     if (oldC)							\
200        cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
201     else							\
202        cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
203     pf = parity_table[(UChar)res];				\
204     af = (res ^ argL ^ argR) & 0x10;				\
205     zf = ((DATA_UTYPE)res == 0) << 6;				\
206     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
207     of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
208                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
209     return cf | pf | af | zf | sf | of;			\
210   }								\
211}
212
213/*-------------------------------------------------------------*/
214
215#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
216{								\
217   PREAMBLE(DATA_BITS);						\
218   { UInt cf, pf, af, zf, sf, of;				\
219     UInt argL, argR, oldC, res;		       		\
220     oldC = CC_NDEP & X86G_CC_MASK_C;				\
221     argL = CC_DEP1;						\
222     argR = CC_DEP2 ^ oldC;	       				\
223     res  = (argL - argR) - oldC;				\
224     if (oldC)							\
225        cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
226     else							\
227        cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
228     pf = parity_table[(UChar)res];				\
229     af = (res ^ argL ^ argR) & 0x10;				\
230     zf = ((DATA_UTYPE)res == 0) << 6;				\
231     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
232     of = lshift((argL ^ argR) & (argL ^ res), 			\
233                 12 - DATA_BITS) & X86G_CC_MASK_O;		\
234     return cf | pf | af | zf | sf | of;			\
235   }								\
236}
237
238/*-------------------------------------------------------------*/
239
240#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
241{								\
242   PREAMBLE(DATA_BITS);						\
243   { UInt cf, pf, af, zf, sf, of;				\
244     cf = 0;							\
245     pf = parity_table[(UChar)CC_DEP1];				\
246     af = 0;							\
247     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
248     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
249     of = 0;							\
250     return cf | pf | af | zf | sf | of;			\
251   }								\
252}
253
254/*-------------------------------------------------------------*/
255
256#define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
257{								\
258   PREAMBLE(DATA_BITS);						\
259   { UInt cf, pf, af, zf, sf, of;				\
260     UInt argL, argR, res;					\
261     res  = CC_DEP1;						\
262     argL = res - 1;						\
263     argR = 1;							\
264     cf = CC_NDEP & X86G_CC_MASK_C;				\
265     pf = parity_table[(UChar)res];				\
266     af = (res ^ argL ^ argR) & 0x10;				\
267     zf = ((DATA_UTYPE)res == 0) << 6;				\
268     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
269     of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
270     return cf | pf | af | zf | sf | of;			\
271   }								\
272}
273
274/*-------------------------------------------------------------*/
275
276#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
277{								\
278   PREAMBLE(DATA_BITS);						\
279   { UInt cf, pf, af, zf, sf, of;				\
280     UInt argL, argR, res;					\
281     res  = CC_DEP1;						\
282     argL = res + 1;						\
283     argR = 1;							\
284     cf = CC_NDEP & X86G_CC_MASK_C;				\
285     pf = parity_table[(UChar)res];				\
286     af = (res ^ argL ^ argR) & 0x10;				\
287     zf = ((DATA_UTYPE)res == 0) << 6;				\
288     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
289     of = ((res & DATA_MASK) 					\
290          == ((UInt)SIGN_MASK - 1)) << 11;			\
291     return cf | pf | af | zf | sf | of;			\
292   }								\
293}
294
295/*-------------------------------------------------------------*/
296
297#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
298{								\
299   PREAMBLE(DATA_BITS);						\
300   { UInt cf, pf, af, zf, sf, of;				\
301     cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
302     pf = parity_table[(UChar)CC_DEP1];				\
303     af = 0; /* undefined */					\
304     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
305     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
306     /* of is defined if shift count == 1 */			\
307     of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
308          & X86G_CC_MASK_O;					\
309     return cf | pf | af | zf | sf | of;			\
310   }								\
311}
312
313/*-------------------------------------------------------------*/
314
315#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
316{								\
317   PREAMBLE(DATA_BITS);  					\
318   { UInt cf, pf, af, zf, sf, of;				\
319     cf = CC_DEP2 & 1;						\
320     pf = parity_table[(UChar)CC_DEP1];				\
321     af = 0; /* undefined */					\
322     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
323     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
324     /* of is defined if shift count == 1 */			\
325     of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
326          & X86G_CC_MASK_O;					\
327     return cf | pf | af | zf | sf | of;			\
328   }								\
329}
330
331/*-------------------------------------------------------------*/
332
333/* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
334/* DEP1 = result, NDEP = old flags */
335#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
336{								\
337   PREAMBLE(DATA_BITS);						\
338   { UInt fl 							\
339        = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
340          | (X86G_CC_MASK_C & CC_DEP1)				\
341          | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
342                                      11-(DATA_BITS-1)) 	\
343                     ^ lshift(CC_DEP1, 11)));			\
344     return fl;							\
345   }								\
346}
347
348/*-------------------------------------------------------------*/
349
350/* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
351/* DEP1 = result, NDEP = old flags */
352#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
353{								\
354   PREAMBLE(DATA_BITS);						\
355   { UInt fl 							\
356        = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
357          | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
358          | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
359                                      11-(DATA_BITS-1)) 	\
360                     ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
361     return fl;							\
362   }								\
363}
364
365/*-------------------------------------------------------------*/
366
367#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
368                                DATA_U2TYPE, NARROWto2U)        \
369{                                                               \
370   PREAMBLE(DATA_BITS);                                         \
371   { UInt cf, pf, af, zf, sf, of;                               \
372     DATA_UTYPE  hi;                                            \
373     DATA_UTYPE  lo                                             \
374        = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
375                     * ((DATA_UTYPE)CC_DEP2) );                 \
376     DATA_U2TYPE rr                                             \
377        = NARROWto2U(                                           \
378             ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
379             * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
380     hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
381     cf = (hi != 0);                                            \
382     pf = parity_table[(UChar)lo];                              \
383     af = 0; /* undefined */                                    \
384     zf = (lo == 0) << 6;                                       \
385     sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
386     of = cf << 11;                                             \
387     return cf | pf | af | zf | sf | of;                        \
388   }								\
389}
390
391/*-------------------------------------------------------------*/
392
393#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
394                                DATA_S2TYPE, NARROWto2S)        \
395{                                                               \
396   PREAMBLE(DATA_BITS);                                         \
397   { UInt cf, pf, af, zf, sf, of;                               \
398     DATA_STYPE  hi;                                            \
399     DATA_STYPE  lo                                             \
400        = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
401                     * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
402     DATA_S2TYPE rr                                             \
403        = NARROWto2S(                                           \
404             ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
405             * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
406     hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
407     cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
408     pf = parity_table[(UChar)lo];                              \
409     af = 0; /* undefined */                                    \
410     zf = (lo == 0) << 6;                                       \
411     sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
412     of = cf << 11;                                             \
413     return cf | pf | af | zf | sf | of;                        \
414   }								\
415}
416
417
418#if PROFILE_EFLAGS
419
420static Bool initted     = False;
421
422/* C flag, fast route */
423static UInt tabc_fast[X86G_CC_OP_NUMBER];
424/* C flag, slow route */
425static UInt tabc_slow[X86G_CC_OP_NUMBER];
426/* table for calculate_cond */
427static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428/* total entry counts for calc_all, calc_c, calc_cond. */
429static UInt n_calc_all  = 0;
430static UInt n_calc_c    = 0;
431static UInt n_calc_cond = 0;
432
433#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434
435
436static void showCounts ( void )
437{
438   Int op, co;
439   HChar ch;
440   vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
441              n_calc_all, n_calc_cond, n_calc_c);
442
443   vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
444              "    S   NS    P   NP    L   NL   LE  NLE\n");
445   vex_printf("     -----------------------------------------------------"
446              "----------------------------------------\n");
447   for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448
449      ch = ' ';
450      if (op > 0 && (op-1) % 3 == 0)
451         ch = 'B';
452      if (op > 0 && (op-1) % 3 == 1)
453         ch = 'W';
454      if (op > 0 && (op-1) % 3 == 2)
455         ch = 'L';
456
457      vex_printf("%2d%c: ", op, ch);
458      vex_printf("%6u ", tabc_slow[op]);
459      vex_printf("%6u ", tabc_fast[op]);
460      for (co = 0; co < 16; co++) {
461         Int n = tab_cond[op][co];
462         if (n >= 1000) {
463            vex_printf(" %3dK", n / 1000);
464         } else
465         if (n >= 0) {
466            vex_printf(" %3d ", n );
467         } else {
468            vex_printf("     ");
469         }
470      }
471      vex_printf("\n");
472   }
473   vex_printf("\n");
474}
475
476static void initCounts ( void )
477{
478   Int op, co;
479   initted = True;
480   for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481      tabc_fast[op] = tabc_slow[op] = 0;
482      for (co = 0; co < 16; co++)
483         tab_cond[op][co] = 0;
484   }
485}
486
487#endif /* PROFILE_EFLAGS */
488
489
490/* CALLED FROM GENERATED CODE: CLEAN HELPER */
491/* Calculate all the 6 flags from the supplied thunk parameters.
492   Worker function, not directly called from generated code. */
493static
494UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495                                     UInt cc_dep1_formal,
496                                     UInt cc_dep2_formal,
497                                     UInt cc_ndep_formal )
498{
499   switch (cc_op) {
500      case X86G_CC_OP_COPY:
501         return cc_dep1_formal
502                & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503                   | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504
505      case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
506      case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
507      case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
508
509      case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
510      case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
511      case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
512
513      case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
514      case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
515      case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
516
517      case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
518      case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
519      case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
520
521      case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
522      case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523      case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
524
525      case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
526      case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
527      case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
528
529      case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
530      case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
531      case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
532
533      case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
534      case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
535      case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
536
537      case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
538      case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
539      case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
540
541      case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
542      case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
543      case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
544
545      case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
546      case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
547      case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
548
549      case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
550                                                UShort, toUShort );
551      case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
552                                                UInt,   toUInt );
553      case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
554                                                ULong,  idULong );
555
556      case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
557                                                Short,  toUShort );
558      case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
559                                                Int,    toUInt   );
560      case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
561                                                Long,   idULong );
562
563      default:
564         /* shouldn't really make these calls from generated code */
565         vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566                    "( %u, 0x%x, 0x%x, 0x%x )\n",
567                    cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568         vpanic("x86g_calculate_eflags_all_WRK(X86)");
569   }
570}
571
572
573/* CALLED FROM GENERATED CODE: CLEAN HELPER */
574/* Calculate all the 6 flags from the supplied thunk parameters. */
575UInt x86g_calculate_eflags_all ( UInt cc_op,
576                                 UInt cc_dep1,
577                                 UInt cc_dep2,
578                                 UInt cc_ndep )
579{
580#  if PROFILE_EFLAGS
581   if (!initted) initCounts();
582   n_calc_all++;
583   if (SHOW_COUNTS_NOW) showCounts();
584#  endif
585   return
586      x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587}
588
589
590/* CALLED FROM GENERATED CODE: CLEAN HELPER */
591/* Calculate just the carry flag from the supplied thunk parameters. */
592VEX_REGPARM(3)
593UInt x86g_calculate_eflags_c ( UInt cc_op,
594                               UInt cc_dep1,
595                               UInt cc_dep2,
596                               UInt cc_ndep )
597{
598#  if PROFILE_EFLAGS
599   if (!initted) initCounts();
600   n_calc_c++;
601   tabc_fast[cc_op]++;
602   if (SHOW_COUNTS_NOW) showCounts();
603#  endif
604
605   /* Fast-case some common ones. */
606   switch (cc_op) {
607      case X86G_CC_OP_LOGICL:
608      case X86G_CC_OP_LOGICW:
609      case X86G_CC_OP_LOGICB:
610         return 0;
611      case X86G_CC_OP_SUBL:
612         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613                   ? X86G_CC_MASK_C : 0;
614      case X86G_CC_OP_SUBW:
615         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616                   ? X86G_CC_MASK_C : 0;
617      case X86G_CC_OP_SUBB:
618         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619                   ? X86G_CC_MASK_C : 0;
620      case X86G_CC_OP_INCL:
621      case X86G_CC_OP_DECL:
622         return cc_ndep & X86G_CC_MASK_C;
623      default:
624         break;
625   }
626
627#  if PROFILE_EFLAGS
628   tabc_fast[cc_op]--;
629   tabc_slow[cc_op]++;
630#  endif
631
632   return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633          & X86G_CC_MASK_C;
634}
635
636
637/* CALLED FROM GENERATED CODE: CLEAN HELPER */
638/* returns 1 or 0 */
639UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640                                UInt cc_op,
641                                UInt cc_dep1,
642                                UInt cc_dep2,
643                                UInt cc_ndep )
644{
645   UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646                                               cc_dep2, cc_ndep);
647   UInt of,sf,zf,cf,pf;
648   UInt inv = cond & 1;
649
650#  if PROFILE_EFLAGS
651   if (!initted) initCounts();
652   tab_cond[cc_op][cond]++;
653   n_calc_cond++;
654   if (SHOW_COUNTS_NOW) showCounts();
655#  endif
656
657   switch (cond) {
658      case X86CondNO:
659      case X86CondO: /* OF == 1 */
660         of = eflags >> X86G_CC_SHIFT_O;
661         return 1 & (inv ^ of);
662
663      case X86CondNZ:
664      case X86CondZ: /* ZF == 1 */
665         zf = eflags >> X86G_CC_SHIFT_Z;
666         return 1 & (inv ^ zf);
667
668      case X86CondNB:
669      case X86CondB: /* CF == 1 */
670         cf = eflags >> X86G_CC_SHIFT_C;
671         return 1 & (inv ^ cf);
672         break;
673
674      case X86CondNBE:
675      case X86CondBE: /* (CF or ZF) == 1 */
676         cf = eflags >> X86G_CC_SHIFT_C;
677         zf = eflags >> X86G_CC_SHIFT_Z;
678         return 1 & (inv ^ (cf | zf));
679         break;
680
681      case X86CondNS:
682      case X86CondS: /* SF == 1 */
683         sf = eflags >> X86G_CC_SHIFT_S;
684         return 1 & (inv ^ sf);
685
686      case X86CondNP:
687      case X86CondP: /* PF == 1 */
688         pf = eflags >> X86G_CC_SHIFT_P;
689         return 1 & (inv ^ pf);
690
691      case X86CondNL:
692      case X86CondL: /* (SF xor OF) == 1 */
693         sf = eflags >> X86G_CC_SHIFT_S;
694         of = eflags >> X86G_CC_SHIFT_O;
695         return 1 & (inv ^ (sf ^ of));
696         break;
697
698      case X86CondNLE:
699      case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
700         sf = eflags >> X86G_CC_SHIFT_S;
701         of = eflags >> X86G_CC_SHIFT_O;
702         zf = eflags >> X86G_CC_SHIFT_Z;
703         return 1 & (inv ^ ((sf ^ of) | zf));
704         break;
705
706      default:
707         /* shouldn't really make these calls from generated code */
708         vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709                    cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710         vpanic("x86g_calculate_condition");
711   }
712}
713
714
715/* VISIBLE TO LIBVEX CLIENT */
716UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717{
718   UInt eflags = x86g_calculate_eflags_all_WRK(
719                    vex_state->guest_CC_OP,
720                    vex_state->guest_CC_DEP1,
721                    vex_state->guest_CC_DEP2,
722                    vex_state->guest_CC_NDEP
723                 );
724   UInt dflag = vex_state->guest_DFLAG;
725   vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726   if (dflag == 0xFFFFFFFF)
727      eflags |= (1<<10);
728   if (vex_state->guest_IDFLAG == 1)
729      eflags |= (1<<21);
730   if (vex_state->guest_ACFLAG == 1)
731      eflags |= (1<<18);
732
733   return eflags;
734}
735
736/* VISIBLE TO LIBVEX CLIENT */
737void
738LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
739                              /*MOD*/VexGuestX86State* vex_state )
740{
741   UInt oszacp = x86g_calculate_eflags_all_WRK(
742                    vex_state->guest_CC_OP,
743                    vex_state->guest_CC_DEP1,
744                    vex_state->guest_CC_DEP2,
745                    vex_state->guest_CC_NDEP
746                 );
747   if (new_carry_flag & 1) {
748      oszacp |= X86G_CC_MASK_C;
749   } else {
750      oszacp &= ~X86G_CC_MASK_C;
751   }
752   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
753   vex_state->guest_CC_DEP1 = oszacp;
754   vex_state->guest_CC_DEP2 = 0;
755   vex_state->guest_CC_NDEP = 0;
756}
757
758
759/*---------------------------------------------------------------*/
760/*--- %eflags translation-time function specialisers.         ---*/
761/*--- These help iropt specialise calls the above run-time    ---*/
762/*--- %eflags functions.                                      ---*/
763/*---------------------------------------------------------------*/
764
765/* Used by the optimiser to try specialisations.  Returns an
766   equivalent expression, or NULL if none. */
767
768static inline Bool isU32 ( IRExpr* e, UInt n )
769{
770   return
771      toBool( e->tag == Iex_Const
772              && e->Iex.Const.con->tag == Ico_U32
773              && e->Iex.Const.con->Ico.U32 == n );
774}
775
776IRExpr* guest_x86_spechelper ( const HChar* function_name,
777                               IRExpr** args,
778                               IRStmt** precedingStmts,
779                               Int      n_precedingStmts )
780{
781#  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
782#  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
783#  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
784#  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
785
786   Int i, arity = 0;
787   for (i = 0; args[i]; i++)
788      arity++;
789#  if 0
790   vex_printf("spec request:\n");
791   vex_printf("   %s  ", function_name);
792   for (i = 0; i < arity; i++) {
793      vex_printf("  ");
794      ppIRExpr(args[i]);
795   }
796   vex_printf("\n");
797#  endif
798
799   /* --------- specialising "x86g_calculate_condition" --------- */
800
801   if (vex_streq(function_name, "x86g_calculate_condition")) {
802      /* specialise calls to above "calculate condition" function */
803      IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
804      vassert(arity == 5);
805      cond    = args[0];
806      cc_op   = args[1];
807      cc_dep1 = args[2];
808      cc_dep2 = args[3];
809
810      /*---------------- ADDL ----------------*/
811
812      if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
813         /* long add, then Z --> test (dst+src == 0) */
814         return unop(Iop_1Uto32,
815                     binop(Iop_CmpEQ32,
816                           binop(Iop_Add32, cc_dep1, cc_dep2),
817                           mkU32(0)));
818      }
819
820      /*---------------- SUBL ----------------*/
821
822      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
823         /* long sub/cmp, then Z --> test dst==src */
824         return unop(Iop_1Uto32,
825                     binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
826      }
827      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
828         /* long sub/cmp, then NZ --> test dst!=src */
829         return unop(Iop_1Uto32,
830                     binop(Iop_CmpNE32, cc_dep1, cc_dep2));
831      }
832
833      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
834         /* long sub/cmp, then L (signed less than)
835            --> test dst <s src */
836         return unop(Iop_1Uto32,
837                     binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
838      }
839      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
840         /* long sub/cmp, then NL (signed greater than or equal)
841            --> test !(dst <s src) */
842         return binop(Iop_Xor32,
843                      unop(Iop_1Uto32,
844                           binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
845                      mkU32(1));
846      }
847
848      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
849         /* long sub/cmp, then LE (signed less than or equal)
850            --> test dst <=s src */
851         return unop(Iop_1Uto32,
852                     binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
853      }
854      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
855         /* long sub/cmp, then NLE (signed not less than or equal)
856            --> test dst >s src
857            --> test !(dst <=s src) */
858         return binop(Iop_Xor32,
859                      unop(Iop_1Uto32,
860                           binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
861                      mkU32(1));
862      }
863
864      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
865         /* long sub/cmp, then BE (unsigned less than or equal)
866            --> test dst <=u src */
867         return unop(Iop_1Uto32,
868                     binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
869      }
870      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
871         /* long sub/cmp, then BE (unsigned greater than)
872            --> test !(dst <=u src) */
873         return binop(Iop_Xor32,
874                      unop(Iop_1Uto32,
875                           binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
876                      mkU32(1));
877      }
878
879      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
880         /* long sub/cmp, then B (unsigned less than)
881            --> test dst <u src */
882         return unop(Iop_1Uto32,
883                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
884      }
885      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
886         /* long sub/cmp, then NB (unsigned greater than or equal)
887            --> test !(dst <u src) */
888         return binop(Iop_Xor32,
889                      unop(Iop_1Uto32,
890                           binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
891                      mkU32(1));
892      }
893
894      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
895         /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
896         return unop(Iop_1Uto32,
897                     binop(Iop_CmpLT32S,
898                           binop(Iop_Sub32, cc_dep1, cc_dep2),
899                           mkU32(0)));
900      }
901      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
902         /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
903         return binop(Iop_Xor32,
904                      unop(Iop_1Uto32,
905                           binop(Iop_CmpLT32S,
906                                 binop(Iop_Sub32, cc_dep1, cc_dep2),
907                                 mkU32(0))),
908                      mkU32(1));
909      }
910
911      /*---------------- SUBW ----------------*/
912
913      if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
914         /* word sub/cmp, then Z --> test dst==src */
915         return unop(Iop_1Uto32,
916                     binop(Iop_CmpEQ16,
917                           unop(Iop_32to16,cc_dep1),
918                           unop(Iop_32to16,cc_dep2)));
919      }
920      if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
921         /* word sub/cmp, then NZ --> test dst!=src */
922         return unop(Iop_1Uto32,
923                     binop(Iop_CmpNE16,
924                           unop(Iop_32to16,cc_dep1),
925                           unop(Iop_32to16,cc_dep2)));
926      }
927
928      /*---------------- SUBB ----------------*/
929
930      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
931         /* byte sub/cmp, then Z --> test dst==src */
932         return unop(Iop_1Uto32,
933                     binop(Iop_CmpEQ8,
934                           unop(Iop_32to8,cc_dep1),
935                           unop(Iop_32to8,cc_dep2)));
936      }
937      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
938         /* byte sub/cmp, then NZ --> test dst!=src */
939         return unop(Iop_1Uto32,
940                     binop(Iop_CmpNE8,
941                           unop(Iop_32to8,cc_dep1),
942                           unop(Iop_32to8,cc_dep2)));
943      }
944
945      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
946         /* byte sub/cmp, then NBE (unsigned greater than)
947            --> test src <u dst */
948         /* Note, args are opposite way round from the usual */
949         return unop(Iop_1Uto32,
950                     binop(Iop_CmpLT32U,
951                           binop(Iop_And32,cc_dep2,mkU32(0xFF)),
952			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
953      }
954
955      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
956                                        && isU32(cc_dep2, 0)) {
957         /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
958                                         --> test dst <s 0
959                                         --> (UInt)dst[7]
960            This is yet another scheme by which gcc figures out if the
961            top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
962         /* Note: isU32(cc_dep2, 0) is correct, even though this is
963            for an 8-bit comparison, since the args to the helper
964            function are always U32s. */
965         return binop(Iop_And32,
966                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
967                      mkU32(1));
968      }
969      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
970                                        && isU32(cc_dep2, 0)) {
971         /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
972                                          --> test !(dst <s 0)
973                                          --> (UInt) !dst[7]
974         */
975         return binop(Iop_Xor32,
976                      binop(Iop_And32,
977                            binop(Iop_Shr32,cc_dep1,mkU8(7)),
978                            mkU32(1)),
979                mkU32(1));
980      }
981
982      /*---------------- LOGICL ----------------*/
983
984      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
985         /* long and/or/xor, then Z --> test dst==0 */
986         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
987      }
988      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
989         /* long and/or/xor, then NZ --> test dst!=0 */
990         return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
991      }
992
993      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
994         /* long and/or/xor, then LE
995            This is pretty subtle.  LOGIC sets SF and ZF according to the
996            result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
997            OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
998            the result is <=signed 0.  Hence ...
999         */
1000         return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1001      }
1002
1003      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1004         /* long and/or/xor, then BE
1005            LOGIC sets ZF according to the result and makes CF be zero.
1006            BE computes (CF | ZF), but CF is zero, so this reduces ZF
1007            -- which will be 1 iff the result is zero.  Hence ...
1008         */
1009         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1010      }
1011
1012      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1013         /* see comment below for (LOGICB, CondS) */
1014         /* long and/or/xor, then S --> (UInt)result[31] */
1015         return binop(Iop_And32,
1016                      binop(Iop_Shr32,cc_dep1,mkU8(31)),
1017                      mkU32(1));
1018      }
1019      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1020         /* see comment below for (LOGICB, CondNS) */
1021         /* long and/or/xor, then S --> (UInt) ~ result[31] */
1022         return binop(Iop_Xor32,
1023                binop(Iop_And32,
1024                      binop(Iop_Shr32,cc_dep1,mkU8(31)),
1025                      mkU32(1)),
1026                mkU32(1));
1027      }
1028
1029      /*---------------- LOGICW ----------------*/
1030
1031      if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1032         /* word and/or/xor, then Z --> test dst==0 */
1033         return unop(Iop_1Uto32,
1034                     binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1035                                        mkU32(0)));
1036      }
1037
1038      if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1039         /* see comment below for (LOGICB, CondS) */
1040         /* word and/or/xor, then S --> (UInt)result[15] */
1041         return binop(Iop_And32,
1042                      binop(Iop_Shr32,cc_dep1,mkU8(15)),
1043                      mkU32(1));
1044      }
1045
1046      /*---------------- LOGICB ----------------*/
1047
1048      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1049         /* byte and/or/xor, then Z --> test dst==0 */
1050         return unop(Iop_1Uto32,
1051                     binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1052                                        mkU32(0)));
1053      }
1054      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1055         /* byte and/or/xor, then Z --> test dst!=0 */
1056         /* b9ac9:       84 c0                   test   %al,%al
1057            b9acb:       75 0d                   jne    b9ada */
1058         return unop(Iop_1Uto32,
1059                     binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1060                                        mkU32(0)));
1061      }
1062
1063      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1064         /* this is an idiom gcc sometimes uses to find out if the top
1065            bit of a byte register is set: eg testb %al,%al; js ..
1066            Since it just depends on the top bit of the byte, extract
1067            that bit and explicitly get rid of all the rest.  This
1068            helps memcheck avoid false positives in the case where any
1069            of the other bits in the byte are undefined. */
1070         /* byte and/or/xor, then S --> (UInt)result[7] */
1071         return binop(Iop_And32,
1072                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1073                      mkU32(1));
1074      }
1075      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1076         /* ditto, for negation-of-S. */
1077         /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1078         return binop(Iop_Xor32,
1079                binop(Iop_And32,
1080                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1081                      mkU32(1)),
1082                mkU32(1));
1083      }
1084
1085      /*---------------- DECL ----------------*/
1086
1087      if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1088         /* dec L, then Z --> test dst == 0 */
1089         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1090      }
1091
1092      if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1093         /* dec L, then S --> compare DST <s 0 */
1094         return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1095      }
1096
1097      /*---------------- DECW ----------------*/
1098
1099      if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1100         /* dec W, then Z --> test dst == 0 */
1101         return unop(Iop_1Uto32,
1102                     binop(Iop_CmpEQ32,
1103                           binop(Iop_Shl32,cc_dep1,mkU8(16)),
1104                           mkU32(0)));
1105      }
1106
1107      /*---------------- INCW ----------------*/
1108
1109      if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1110         /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1111         /* inc W, then Z --> test dst == 0 */
1112         return unop(Iop_1Uto32,
1113                     binop(Iop_CmpEQ32,
1114                           binop(Iop_Shl32,cc_dep1,mkU8(16)),
1115                           mkU32(0)));
1116      }
1117
1118      /*---------------- SHRL ----------------*/
1119
1120      if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1121         /* SHRL, then Z --> test dep1 == 0 */
1122         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1123      }
1124
1125      /*---------------- COPY ----------------*/
1126      /* This can happen, as a result of x87 FP compares: "fcom ... ;
1127         fnstsw %ax ; sahf ; jbe" for example. */
1128
1129      if (isU32(cc_op, X86G_CC_OP_COPY) &&
1130          (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1131         /* COPY, then BE --> extract C and Z from dep1, and test
1132            (C or Z) == 1. */
1133         /* COPY, then NBE --> extract C and Z from dep1, and test
1134            (C or Z) == 0. */
1135         UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1136         return
1137            unop(
1138               Iop_1Uto32,
1139               binop(
1140                  Iop_CmpEQ32,
1141                  binop(
1142                     Iop_And32,
1143                     binop(
1144                        Iop_Or32,
1145                        binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1146                        binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1147                     ),
1148                     mkU32(1)
1149                  ),
1150                  mkU32(nnn)
1151               )
1152            );
1153      }
1154
1155      if (isU32(cc_op, X86G_CC_OP_COPY)
1156          && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1157         /* COPY, then B --> extract C from dep1, and test (C == 1). */
1158         /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1159         UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1160         return
1161            unop(
1162               Iop_1Uto32,
1163               binop(
1164                  Iop_CmpEQ32,
1165                  binop(
1166                     Iop_And32,
1167                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1168                     mkU32(1)
1169                  ),
1170                  mkU32(nnn)
1171               )
1172            );
1173      }
1174
1175      if (isU32(cc_op, X86G_CC_OP_COPY)
1176          && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1177         /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1178         /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1179         UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1180         return
1181            unop(
1182               Iop_1Uto32,
1183               binop(
1184                  Iop_CmpEQ32,
1185                  binop(
1186                     Iop_And32,
1187                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1188                     mkU32(1)
1189                  ),
1190                  mkU32(nnn)
1191               )
1192            );
1193      }
1194
1195      if (isU32(cc_op, X86G_CC_OP_COPY)
1196          && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1197         /* COPY, then P --> extract P from dep1, and test (P == 1). */
1198         /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1199         UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1200         return
1201            unop(
1202               Iop_1Uto32,
1203               binop(
1204                  Iop_CmpEQ32,
1205                  binop(
1206                     Iop_And32,
1207                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1208                     mkU32(1)
1209                  ),
1210                  mkU32(nnn)
1211               )
1212            );
1213      }
1214
1215      return NULL;
1216   }
1217
1218   /* --------- specialising "x86g_calculate_eflags_c" --------- */
1219
1220   if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1221      /* specialise calls to above "calculate_eflags_c" function */
1222      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1223      vassert(arity == 4);
1224      cc_op   = args[0];
1225      cc_dep1 = args[1];
1226      cc_dep2 = args[2];
1227      cc_ndep = args[3];
1228
1229      if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1230         /* C after sub denotes unsigned less than */
1231         return unop(Iop_1Uto32,
1232                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1233      }
1234      if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1235         /* C after sub denotes unsigned less than */
1236         return unop(Iop_1Uto32,
1237                     binop(Iop_CmpLT32U,
1238                           binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1239                           binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1240      }
1241      if (isU32(cc_op, X86G_CC_OP_LOGICL)
1242          || isU32(cc_op, X86G_CC_OP_LOGICW)
1243          || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1244         /* cflag after logic is zero */
1245         return mkU32(0);
1246      }
1247      if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1248         /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1249         return cc_ndep;
1250      }
1251      if (isU32(cc_op, X86G_CC_OP_COPY)) {
1252         /* cflag after COPY is stored in DEP1. */
1253         return
1254            binop(
1255               Iop_And32,
1256               binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1257               mkU32(1)
1258            );
1259      }
1260      if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1261         /* C after add denotes sum <u either arg */
1262         return unop(Iop_1Uto32,
1263                     binop(Iop_CmpLT32U,
1264                           binop(Iop_Add32, cc_dep1, cc_dep2),
1265                           cc_dep1));
1266      }
1267      // ATC, requires verification, no test case known
1268      //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1269      //   /* C after signed widening multiply denotes the case where
1270      //      the top half of the result isn't simply the sign extension
1271      //      of the bottom half (iow the result doesn't fit completely
1272      //      in the bottom half).  Hence:
1273      //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1274      //      where 'x' denotes signed widening multiply.*/
1275      //   return
1276      //      unop(Iop_1Uto32,
1277      //           binop(Iop_CmpNE32,
1278      //                 unop(Iop_64HIto32,
1279      //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1280      //                 binop(Iop_Sar32,
1281      //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1282      //}
1283#     if 0
1284      if (cc_op->tag == Iex_Const) {
1285         vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1286      }
1287#     endif
1288
1289      return NULL;
1290   }
1291
1292   /* --------- specialising "x86g_calculate_eflags_all" --------- */
1293
1294   if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1295      /* specialise calls to above "calculate_eflags_all" function */
1296      IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1297      vassert(arity == 4);
1298      cc_op   = args[0];
1299      cc_dep1 = args[1];
1300      /* cc_dep2 = args[2]; */
1301      /* cc_ndep = args[3]; */
1302
1303      if (isU32(cc_op, X86G_CC_OP_COPY)) {
1304         /* eflags after COPY are stored in DEP1. */
1305         return
1306            binop(
1307               Iop_And32,
1308               cc_dep1,
1309               mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1310                     | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1311            );
1312      }
1313      return NULL;
1314   }
1315
1316#  undef unop
1317#  undef binop
1318#  undef mkU32
1319#  undef mkU8
1320
1321   return NULL;
1322}
1323
1324
1325/*---------------------------------------------------------------*/
1326/*--- Supporting functions for x87 FPU activities.            ---*/
1327/*---------------------------------------------------------------*/
1328
1329static inline Bool host_is_little_endian ( void )
1330{
1331   UInt x = 0x76543210;
1332   UChar* p = (UChar*)(&x);
1333   return toBool(*p == 0x10);
1334}
1335
1336/* 80 and 64-bit floating point formats:
1337
1338   80-bit:
1339
1340    S  0       0-------0      zero
1341    S  0       0X------X      denormals
1342    S  1-7FFE  1X------X      normals (all normals have leading 1)
1343    S  7FFF    10------0      infinity
1344    S  7FFF    10X-----X      snan
1345    S  7FFF    11X-----X      qnan
1346
1347   S is the sign bit.  For runs X----X, at least one of the Xs must be
1348   nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1349   there is an explicitly represented leading 1, and a sign bit,
1350   giving 80 in total.
1351
1352   64-bit avoids the confusion of an explicitly represented leading 1
1353   and so is simpler:
1354
1355    S  0      0------0   zero
1356    S  0      X------X   denormals
1357    S  1-7FE  any        normals
1358    S  7FF    0------0   infinity
1359    S  7FF    0X-----X   snan
1360    S  7FF    1X-----X   qnan
1361
1362   Exponent is 11 bits, fractional part is 52 bits, and there is a
1363   sign bit, giving 64 in total.
1364*/
1365
1366/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1367/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1368UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1369{
1370   Bool   mantissaIsZero;
1371   Int    bexp;
1372   UChar  sign;
1373   UChar* f64;
1374
1375   vassert(host_is_little_endian());
1376
1377   /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1378
1379   f64  = (UChar*)(&dbl);
1380   sign = toUChar( (f64[7] >> 7) & 1 );
1381
1382   /* First off, if the tag indicates the register was empty,
1383      return 1,0,sign,1 */
1384   if (tag == 0) {
1385      /* vex_printf("Empty\n"); */
1386      return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1387                                 | X86G_FC_MASK_C0;
1388   }
1389
1390   bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1391   bexp &= 0x7FF;
1392
1393   mantissaIsZero
1394      = toBool(
1395           (f64[6] & 0x0F) == 0
1396           && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1397        );
1398
1399   /* If both exponent and mantissa are zero, the value is zero.
1400      Return 1,0,sign,0. */
1401   if (bexp == 0 && mantissaIsZero) {
1402      /* vex_printf("Zero\n"); */
1403      return X86G_FC_MASK_C3 | 0
1404                             | (sign << X86G_FC_SHIFT_C1) | 0;
1405   }
1406
1407   /* If exponent is zero but mantissa isn't, it's a denormal.
1408      Return 1,1,sign,0. */
1409   if (bexp == 0 && !mantissaIsZero) {
1410      /* vex_printf("Denormal\n"); */
1411      return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1412                             | (sign << X86G_FC_SHIFT_C1) | 0;
1413   }
1414
1415   /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1416      Return 0,1,sign,1. */
1417   if (bexp == 0x7FF && mantissaIsZero) {
1418      /* vex_printf("Inf\n"); */
1419      return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1420                                 | X86G_FC_MASK_C0;
1421   }
1422
1423   /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1424      Return 0,0,sign,1. */
1425   if (bexp == 0x7FF && !mantissaIsZero) {
1426      /* vex_printf("NaN\n"); */
1427      return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1428   }
1429
1430   /* Uh, ok, we give up.  It must be a normal finite number.
1431      Return 0,1,sign,0.
1432   */
1433   /* vex_printf("normal\n"); */
1434   return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1435}
1436
1437
1438/* CALLED FROM GENERATED CODE */
1439/* DIRTY HELPER (reads guest memory) */
1440ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1441{
1442   ULong f64;
1443   convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1444   return f64;
1445}
1446
1447/* CALLED FROM GENERATED CODE */
1448/* DIRTY HELPER (writes guest memory) */
1449void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1450{
1451   convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1452}
1453
1454
1455/*----------------------------------------------*/
1456/*--- The exported fns ..                    ---*/
1457/*----------------------------------------------*/
1458
1459/* Layout of the real x87 state. */
1460/* 13 June 05: Fpu_State and auxiliary constants was moved to
1461   g_generic_x87.h */
1462
1463
1464/* CLEAN HELPER */
1465/* fpucw[15:0] contains a x87 native format FPU control word.
1466   Extract from it the required FPROUND value and any resulting
1467   emulation warning, and return (warn << 32) | fpround value.
1468*/
1469ULong x86g_check_fldcw ( UInt fpucw )
1470{
1471   /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1472   /* NOTE, encoded exactly as per enum IRRoundingMode. */
1473   UInt rmode = (fpucw >> 10) & 3;
1474
1475   /* Detect any required emulation warnings. */
1476   VexEmNote ew = EmNote_NONE;
1477
1478   if ((fpucw & 0x3F) != 0x3F) {
1479      /* unmasked exceptions! */
1480      ew = EmWarn_X86_x87exns;
1481   }
1482   else
1483   if (((fpucw >> 8) & 3) != 3) {
1484      /* unsupported precision */
1485      ew = EmWarn_X86_x87precision;
1486   }
1487
1488   return (((ULong)ew) << 32) | ((ULong)rmode);
1489}
1490
1491/* CLEAN HELPER */
1492/* Given fpround as an IRRoundingMode value, create a suitable x87
1493   native format FPU control word. */
1494UInt x86g_create_fpucw ( UInt fpround )
1495{
1496   fpround &= 3;
1497   return 0x037F | (fpround << 10);
1498}
1499
1500
1501/* CLEAN HELPER */
1502/* mxcsr[15:0] contains a SSE native format MXCSR value.
1503   Extract from it the required SSEROUND value and any resulting
1504   emulation warning, and return (warn << 32) | sseround value.
1505*/
1506ULong x86g_check_ldmxcsr ( UInt mxcsr )
1507{
1508   /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1509   /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510   UInt rmode = (mxcsr >> 13) & 3;
1511
1512   /* Detect any required emulation warnings. */
1513   VexEmNote ew = EmNote_NONE;
1514
1515   if ((mxcsr & 0x1F80) != 0x1F80) {
1516      /* unmasked exceptions! */
1517      ew = EmWarn_X86_sseExns;
1518   }
1519   else
1520   if (mxcsr & (1<<15)) {
1521      /* FZ is set */
1522      ew = EmWarn_X86_fz;
1523   }
1524   else
1525   if (mxcsr & (1<<6)) {
1526      /* DAZ is set */
1527      ew = EmWarn_X86_daz;
1528   }
1529
1530   return (((ULong)ew) << 32) | ((ULong)rmode);
1531}
1532
1533
1534/* CLEAN HELPER */
1535/* Given sseround as an IRRoundingMode value, create a suitable SSE
1536   native format MXCSR value. */
1537UInt x86g_create_mxcsr ( UInt sseround )
1538{
1539   sseround &= 3;
1540   return 0x1F80 | (sseround << 13);
1541}
1542
1543
1544/* CALLED FROM GENERATED CODE */
1545/* DIRTY HELPER (writes guest state) */
1546/* Initialise the x87 FPU state as per 'finit'. */
1547void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1548{
1549   Int i;
1550   gst->guest_FTOP = 0;
1551   for (i = 0; i < 8; i++) {
1552      gst->guest_FPTAG[i] = 0; /* empty */
1553      gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1554   }
1555   gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1556   gst->guest_FC3210  = 0;
1557}
1558
1559
1560/* This is used to implement both 'frstor' and 'fldenv'.  The latter
1561   appears to differ from the former only in that the 8 FP registers
1562   themselves are not transferred into the guest state. */
1563static
1564VexEmNote do_put_x87 ( Bool moveRegs,
1565                       /*IN*/UChar* x87_state,
1566                       /*OUT*/VexGuestX86State* vex_state )
1567{
1568   Int        stno, preg;
1569   UInt       tag;
1570   ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1571   UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1572   Fpu_State* x87     = (Fpu_State*)x87_state;
1573   UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1574   UInt       tagw    = x87->env[FP_ENV_TAG];
1575   UInt       fpucw   = x87->env[FP_ENV_CTRL];
1576   UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1577   VexEmNote  ew;
1578   UInt       fpround;
1579   ULong      pair;
1580
1581   /* Copy registers and tags */
1582   for (stno = 0; stno < 8; stno++) {
1583      preg = (stno + ftop) & 7;
1584      tag = (tagw >> (2*preg)) & 3;
1585      if (tag == 3) {
1586         /* register is empty */
1587         /* hmm, if it's empty, does it still get written?  Probably
1588            safer to say it does.  If we don't, memcheck could get out
1589            of sync, in that it thinks all FP registers are defined by
1590            this helper, but in reality some have not been updated. */
1591         if (moveRegs)
1592            vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1593         vexTags[preg] = 0;
1594      } else {
1595         /* register is non-empty */
1596         if (moveRegs)
1597            convert_f80le_to_f64le( &x87->reg[10*stno],
1598                                    (UChar*)&vexRegs[preg] );
1599         vexTags[preg] = 1;
1600      }
1601   }
1602
1603   /* stack pointer */
1604   vex_state->guest_FTOP = ftop;
1605
1606   /* status word */
1607   vex_state->guest_FC3210 = c3210;
1608
1609   /* handle the control word, setting FPROUND and detecting any
1610      emulation warnings. */
1611   pair    = x86g_check_fldcw ( (UInt)fpucw );
1612   fpround = (UInt)pair;
1613   ew      = (VexEmNote)(pair >> 32);
1614
1615   vex_state->guest_FPROUND = fpround & 3;
1616
1617   /* emulation warnings --> caller */
1618   return ew;
1619}
1620
1621
1622/* Create an x87 FPU state from the guest state, as close as
1623   we can approximate it. */
1624static
1625void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1626                  /*OUT*/UChar* x87_state )
1627{
1628   Int        i, stno, preg;
1629   UInt       tagw;
1630   ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1631   UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1632   Fpu_State* x87     = (Fpu_State*)x87_state;
1633   UInt       ftop    = vex_state->guest_FTOP;
1634   UInt       c3210   = vex_state->guest_FC3210;
1635
1636   for (i = 0; i < 14; i++)
1637      x87->env[i] = 0;
1638
1639   x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1640   x87->env[FP_ENV_STAT]
1641      = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1642   x87->env[FP_ENV_CTRL]
1643      = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1644
1645   /* Dump the register stack in ST order. */
1646   tagw = 0;
1647   for (stno = 0; stno < 8; stno++) {
1648      preg = (stno + ftop) & 7;
1649      if (vexTags[preg] == 0) {
1650         /* register is empty */
1651         tagw |= (3 << (2*preg));
1652         convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1653                                 &x87->reg[10*stno] );
1654      } else {
1655         /* register is full. */
1656         tagw |= (0 << (2*preg));
1657         convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1658                                 &x87->reg[10*stno] );
1659      }
1660   }
1661   x87->env[FP_ENV_TAG] = toUShort(tagw);
1662}
1663
1664
1665/* CALLED FROM GENERATED CODE */
1666/* DIRTY HELPER (reads guest state, writes guest mem) */
1667void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1668{
1669   /* Somewhat roundabout, but at least it's simple. */
1670   Fpu_State tmp;
1671   UShort*   addrS = (UShort*)addr;
1672   UChar*    addrC = (UChar*)addr;
1673   U128*     xmm   = (U128*)(addr + 160);
1674   UInt      mxcsr;
1675   UShort    fp_tags;
1676   UInt      summary_tags;
1677   Int       r, stno;
1678   UShort    *srcS, *dstS;
1679
1680   do_get_x87( gst, (UChar*)&tmp );
1681   mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1682
1683   /* Now build the proper fxsave image from the x87 image we just
1684      made. */
1685
1686   addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1687   addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1688
1689   /* set addrS[2] in an endian-independent way */
1690   summary_tags = 0;
1691   fp_tags = tmp.env[FP_ENV_TAG];
1692   for (r = 0; r < 8; r++) {
1693      if ( ((fp_tags >> (2*r)) & 3) != 3 )
1694         summary_tags |= (1 << r);
1695   }
1696   addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1697   addrC[5]  = 0; /* pad */
1698
1699   addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1700   addrS[4]  = 0;
1701   addrS[5]  = 0; /* FPU IP (bogus) */
1702   addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1703                     could conceivably dump %CS here) */
1704
1705   addrS[7]  = 0; /* Intel reserved */
1706
1707   addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1708   addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1709   addrS[10] = 0; /* segment selector for above operand pointer; %DS
1710                     perhaps? */
1711   addrS[11] = 0; /* Intel reserved */
1712
1713   addrS[12] = toUShort(mxcsr);  /* MXCSR */
1714   addrS[13] = toUShort(mxcsr >> 16);
1715
1716   addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1717   addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1718
1719   /* Copy in the FP registers, in ST order. */
1720   for (stno = 0; stno < 8; stno++) {
1721      srcS = (UShort*)(&tmp.reg[10*stno]);
1722      dstS = (UShort*)(&addrS[16 + 8*stno]);
1723      dstS[0] = srcS[0];
1724      dstS[1] = srcS[1];
1725      dstS[2] = srcS[2];
1726      dstS[3] = srcS[3];
1727      dstS[4] = srcS[4];
1728      dstS[5] = 0;
1729      dstS[6] = 0;
1730      dstS[7] = 0;
1731   }
1732
1733   /* That's the first 160 bytes of the image done.  Now only %xmm0
1734      .. %xmm7 remain to be copied.  If the host is big-endian, these
1735      need to be byte-swapped. */
1736   vassert(host_is_little_endian());
1737
1738#  define COPY_U128(_dst,_src)                       \
1739      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1740           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1741      while (0)
1742
1743   COPY_U128( xmm[0], gst->guest_XMM0 );
1744   COPY_U128( xmm[1], gst->guest_XMM1 );
1745   COPY_U128( xmm[2], gst->guest_XMM2 );
1746   COPY_U128( xmm[3], gst->guest_XMM3 );
1747   COPY_U128( xmm[4], gst->guest_XMM4 );
1748   COPY_U128( xmm[5], gst->guest_XMM5 );
1749   COPY_U128( xmm[6], gst->guest_XMM6 );
1750   COPY_U128( xmm[7], gst->guest_XMM7 );
1751
1752#  undef COPY_U128
1753}
1754
1755
1756/* CALLED FROM GENERATED CODE */
1757/* DIRTY HELPER (writes guest state, reads guest mem) */
1758VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1759{
1760   Fpu_State tmp;
1761   VexEmNote warnX87 = EmNote_NONE;
1762   VexEmNote warnXMM = EmNote_NONE;
1763   UShort*   addrS   = (UShort*)addr;
1764   UChar*    addrC   = (UChar*)addr;
1765   U128*     xmm     = (U128*)(addr + 160);
1766   UShort    fp_tags;
1767   Int       r, stno, i;
1768
1769   /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1770      to be byte-swapped. */
1771   vassert(host_is_little_endian());
1772
1773#  define COPY_U128(_dst,_src)                       \
1774      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1775           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1776      while (0)
1777
1778   COPY_U128( gst->guest_XMM0, xmm[0] );
1779   COPY_U128( gst->guest_XMM1, xmm[1] );
1780   COPY_U128( gst->guest_XMM2, xmm[2] );
1781   COPY_U128( gst->guest_XMM3, xmm[3] );
1782   COPY_U128( gst->guest_XMM4, xmm[4] );
1783   COPY_U128( gst->guest_XMM5, xmm[5] );
1784   COPY_U128( gst->guest_XMM6, xmm[6] );
1785   COPY_U128( gst->guest_XMM7, xmm[7] );
1786
1787#  undef COPY_U128
1788
1789   /* Copy the x87 registers out of the image, into a temporary
1790      Fpu_State struct. */
1791
1792   /* LLVM on Darwin turns the following loop into a movaps plus a
1793      handful of scalar stores.  This would work fine except for the
1794      fact that VEX doesn't keep the stack correctly (16-) aligned for
1795      the call, so it segfaults.  Hence, split the loop into two
1796      pieces (and pray LLVM doesn't merely glue them back together) so
1797      it's composed only of scalar stores and so is alignment
1798      insensitive.  Of course this is a kludge of the lamest kind --
1799      VEX should be fixed properly. */
1800   /* Code that seems to trigger the problem:
1801      for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1802   for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1803   for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1804
1805   for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1806   /* fill in tmp.reg[0..7] */
1807   for (stno = 0; stno < 8; stno++) {
1808      UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1809      UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1810      dstS[0] = srcS[0];
1811      dstS[1] = srcS[1];
1812      dstS[2] = srcS[2];
1813      dstS[3] = srcS[3];
1814      dstS[4] = srcS[4];
1815   }
1816   /* fill in tmp.env[0..13] */
1817   tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1818   tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1819
1820   fp_tags = 0;
1821   for (r = 0; r < 8; r++) {
1822      if (addrC[4] & (1<<r))
1823         fp_tags |= (0 << (2*r)); /* EMPTY */
1824      else
1825         fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1826   }
1827   tmp.env[FP_ENV_TAG] = fp_tags;
1828
1829   /* Now write 'tmp' into the guest state. */
1830   warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1831
1832   { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1833                | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1834     ULong w64 = x86g_check_ldmxcsr( w32 );
1835
1836     warnXMM = (VexEmNote)(w64 >> 32);
1837
1838     gst->guest_SSEROUND = (UInt)w64;
1839   }
1840
1841   /* Prefer an X87 emwarn over an XMM one, if both exist. */
1842   if (warnX87 != EmNote_NONE)
1843      return warnX87;
1844   else
1845      return warnXMM;
1846}
1847
1848
1849/* CALLED FROM GENERATED CODE */
1850/* DIRTY HELPER (reads guest state, writes guest mem) */
1851void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1852{
1853   do_get_x87( gst, (UChar*)addr );
1854}
1855
1856/* CALLED FROM GENERATED CODE */
1857/* DIRTY HELPER (writes guest state, reads guest mem) */
1858VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1859{
1860   return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1861}
1862
1863/* CALLED FROM GENERATED CODE */
1864/* DIRTY HELPER (reads guest state, writes guest mem) */
1865void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1866{
1867   /* Somewhat roundabout, but at least it's simple. */
1868   Int       i;
1869   UShort*   addrP = (UShort*)addr;
1870   Fpu_State tmp;
1871   do_get_x87( gst, (UChar*)&tmp );
1872   for (i = 0; i < 14; i++)
1873      addrP[i] = tmp.env[i];
1874}
1875
1876/* CALLED FROM GENERATED CODE */
1877/* DIRTY HELPER (writes guest state, reads guest mem) */
1878VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1879{
1880   return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1881}
1882
1883
1884/*---------------------------------------------------------------*/
1885/*--- Misc integer helpers, including rotates and CPUID.      ---*/
1886/*---------------------------------------------------------------*/
1887
1888/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1889/* Calculate both flags and value result for rotate right
1890   through the carry bit.  Result in low 32 bits,
1891   new flags (OSZACP) in high 32 bits.
1892*/
1893ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1894{
1895   UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1896
1897   switch (sz) {
1898      case 4:
1899         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1900         of        = ((arg >> 31) ^ cf) & 1;
1901         while (tempCOUNT > 0) {
1902            tempcf = arg & 1;
1903            arg    = (arg >> 1) | (cf << 31);
1904            cf     = tempcf;
1905            tempCOUNT--;
1906         }
1907         break;
1908      case 2:
1909         while (tempCOUNT >= 17) tempCOUNT -= 17;
1910         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1911         of        = ((arg >> 15) ^ cf) & 1;
1912         while (tempCOUNT > 0) {
1913            tempcf = arg & 1;
1914            arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
1915            cf     = tempcf;
1916            tempCOUNT--;
1917         }
1918         break;
1919      case 1:
1920         while (tempCOUNT >= 9) tempCOUNT -= 9;
1921         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1922         of        = ((arg >> 7) ^ cf) & 1;
1923         while (tempCOUNT > 0) {
1924            tempcf = arg & 1;
1925            arg    = ((arg >> 1) & 0x7F) | (cf << 7);
1926            cf     = tempcf;
1927            tempCOUNT--;
1928         }
1929         break;
1930      default:
1931         vpanic("calculate_RCR: invalid size");
1932   }
1933
1934   cf &= 1;
1935   of &= 1;
1936   eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1937   eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1938
1939   return (((ULong)eflags_in) << 32) | ((ULong)arg);
1940}
1941
1942
1943/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1944/* Calculate both flags and value result for rotate left
1945   through the carry bit.  Result in low 32 bits,
1946   new flags (OSZACP) in high 32 bits.
1947*/
1948ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1949{
1950   UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1951
1952   switch (sz) {
1953      case 4:
1954         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1955         while (tempCOUNT > 0) {
1956            tempcf = (arg >> 31) & 1;
1957            arg    = (arg << 1) | (cf & 1);
1958            cf     = tempcf;
1959            tempCOUNT--;
1960         }
1961         of = ((arg >> 31) ^ cf) & 1;
1962         break;
1963      case 2:
1964         while (tempCOUNT >= 17) tempCOUNT -= 17;
1965         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1966         while (tempCOUNT > 0) {
1967            tempcf = (arg >> 15) & 1;
1968            arg    = 0xFFFF & ((arg << 1) | (cf & 1));
1969            cf     = tempcf;
1970            tempCOUNT--;
1971         }
1972         of = ((arg >> 15) ^ cf) & 1;
1973         break;
1974      case 1:
1975         while (tempCOUNT >= 9) tempCOUNT -= 9;
1976         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1977         while (tempCOUNT > 0) {
1978            tempcf = (arg >> 7) & 1;
1979            arg    = 0xFF & ((arg << 1) | (cf & 1));
1980            cf     = tempcf;
1981            tempCOUNT--;
1982         }
1983         of = ((arg >> 7) ^ cf) & 1;
1984         break;
1985      default:
1986         vpanic("calculate_RCL: invalid size");
1987   }
1988
1989   cf &= 1;
1990   of &= 1;
1991   eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1992   eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1993
1994   return (((ULong)eflags_in) << 32) | ((ULong)arg);
1995}
1996
1997
1998/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1999/* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2000   AX value in low half of arg, OSZACP in upper half.
2001   See guest-x86/toIR.c usage point for details.
2002*/
2003static UInt calc_parity_8bit ( UInt w32 ) {
2004   UInt i;
2005   UInt p = 1;
2006   for (i = 0; i < 8; i++)
2007      p ^= (1 & (w32 >> i));
2008   return p;
2009}
2010UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2011{
2012   UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2013   UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2014   UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2015   UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2016   UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2017   UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2018   UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2019   UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2020   UInt result = 0;
2021
2022   switch (opcode) {
2023      case 0x27: { /* DAA */
2024         UInt old_AL = r_AL;
2025         UInt old_C  = r_C;
2026         r_C = 0;
2027         if ((r_AL & 0xF) > 9 || r_A == 1) {
2028            r_AL = r_AL + 6;
2029            r_C  = old_C;
2030            if (r_AL >= 0x100) r_C = 1;
2031            r_A = 1;
2032         } else {
2033            r_A = 0;
2034         }
2035         if (old_AL > 0x99 || old_C == 1) {
2036            r_AL = r_AL + 0x60;
2037            r_C  = 1;
2038         } else {
2039            r_C = 0;
2040         }
2041         /* O is undefined.  S Z and P are set according to the
2042	    result. */
2043         r_AL &= 0xFF;
2044         r_O = 0; /* let's say */
2045         r_S = (r_AL & 0x80) ? 1 : 0;
2046         r_Z = (r_AL == 0) ? 1 : 0;
2047         r_P = calc_parity_8bit( r_AL );
2048         break;
2049      }
2050      case 0x2F: { /* DAS */
2051         UInt old_AL = r_AL;
2052         UInt old_C  = r_C;
2053         r_C = 0;
2054         if ((r_AL & 0xF) > 9 || r_A == 1) {
2055            Bool borrow = r_AL < 6;
2056            r_AL = r_AL - 6;
2057            r_C  = old_C;
2058            if (borrow) r_C = 1;
2059            r_A = 1;
2060         } else {
2061            r_A = 0;
2062         }
2063         if (old_AL > 0x99 || old_C == 1) {
2064            r_AL = r_AL - 0x60;
2065            r_C  = 1;
2066         } else {
2067            /* Intel docs are wrong: r_C = 0; */
2068         }
2069         /* O is undefined.  S Z and P are set according to the
2070	    result. */
2071         r_AL &= 0xFF;
2072         r_O = 0; /* let's say */
2073         r_S = (r_AL & 0x80) ? 1 : 0;
2074         r_Z = (r_AL == 0) ? 1 : 0;
2075         r_P = calc_parity_8bit( r_AL );
2076         break;
2077      }
2078      case 0x37: { /* AAA */
2079         Bool nudge = r_AL > 0xF9;
2080         if ((r_AL & 0xF) > 9 || r_A == 1) {
2081            r_AL = r_AL + 6;
2082            r_AH = r_AH + 1 + (nudge ? 1 : 0);
2083            r_A  = 1;
2084            r_C  = 1;
2085            r_AL = r_AL & 0xF;
2086         } else {
2087            r_A  = 0;
2088            r_C  = 0;
2089            r_AL = r_AL & 0xF;
2090         }
2091         /* O S Z and P are undefined. */
2092         r_O = r_S = r_Z = r_P = 0; /* let's say */
2093         break;
2094      }
2095      case 0x3F: { /* AAS */
2096         Bool nudge = r_AL < 0x06;
2097         if ((r_AL & 0xF) > 9 || r_A == 1) {
2098            r_AL = r_AL - 6;
2099            r_AH = r_AH - 1 - (nudge ? 1 : 0);
2100            r_A  = 1;
2101            r_C  = 1;
2102            r_AL = r_AL & 0xF;
2103         } else {
2104            r_A  = 0;
2105            r_C  = 0;
2106            r_AL = r_AL & 0xF;
2107         }
2108         /* O S Z and P are undefined. */
2109         r_O = r_S = r_Z = r_P = 0; /* let's say */
2110         break;
2111      }
2112      default:
2113         vassert(0);
2114   }
2115   result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2116            | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2117            | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2118            | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2119            | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2120            | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2121            | ( (r_AH & 0xFF) << 8 )
2122            | ( (r_AL & 0xFF) << 0 );
2123   return result;
2124}
2125
2126UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2127{
2128   UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2129   UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2130   UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2131   UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2132   UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2133   UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2134   UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2135   UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2136   UInt result = 0;
2137
2138   switch (opcode) {
2139      case 0xD4: { /* AAM */
2140         r_AH = r_AL / 10;
2141         r_AL = r_AL % 10;
2142         break;
2143      }
2144      case 0xD5: { /* AAD */
2145         r_AL = ((r_AH * 10) + r_AL) & 0xff;
2146         r_AH = 0;
2147         break;
2148      }
2149      default:
2150         vassert(0);
2151   }
2152
2153   r_O = 0; /* let's say (undefined) */
2154   r_C = 0; /* let's say (undefined) */
2155   r_A = 0; /* let's say (undefined) */
2156   r_S = (r_AL & 0x80) ? 1 : 0;
2157   r_Z = (r_AL == 0) ? 1 : 0;
2158   r_P = calc_parity_8bit( r_AL );
2159
2160   result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2161            | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2162            | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2163            | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2164            | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2165            | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2166            | ( (r_AH & 0xFF) << 8 )
2167            | ( (r_AL & 0xFF) << 0 );
2168   return result;
2169}
2170
2171
2172/* CALLED FROM GENERATED CODE */
2173/* DIRTY HELPER (non-referentially-transparent) */
2174/* Horrible hack.  On non-x86 platforms, return 1. */
2175ULong x86g_dirtyhelper_RDTSC ( void )
2176{
2177#  if defined(__i386__)
2178   ULong res;
2179   __asm__ __volatile__("rdtsc" : "=A" (res));
2180   return res;
2181#  else
2182   return 1ULL;
2183#  endif
2184}
2185
2186
2187/* CALLED FROM GENERATED CODE */
2188/* DIRTY HELPER (modifies guest state) */
2189/* Claim to be a P55C (Intel Pentium/MMX) */
2190void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2191{
2192   switch (st->guest_EAX) {
2193      case 0:
2194         st->guest_EAX = 0x1;
2195         st->guest_EBX = 0x756e6547;
2196         st->guest_ECX = 0x6c65746e;
2197         st->guest_EDX = 0x49656e69;
2198         break;
2199      default:
2200         st->guest_EAX = 0x543;
2201         st->guest_EBX = 0x0;
2202         st->guest_ECX = 0x0;
2203         st->guest_EDX = 0x8001bf;
2204         break;
2205   }
2206}
2207
2208/* CALLED FROM GENERATED CODE */
2209/* DIRTY HELPER (modifies guest state) */
2210/* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2211/* But without 3DNow support (weird, but we really don't support it). */
2212void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2213{
2214   switch (st->guest_EAX) {
2215      /* vendor ID */
2216      case 0:
2217         st->guest_EAX = 0x1;
2218         st->guest_EBX = 0x68747541;
2219         st->guest_ECX = 0x444d4163;
2220         st->guest_EDX = 0x69746e65;
2221         break;
2222      /* feature bits */
2223      case 1:
2224         st->guest_EAX = 0x621;
2225         st->guest_EBX = 0x0;
2226         st->guest_ECX = 0x0;
2227         st->guest_EDX = 0x183f9ff;
2228         break;
2229      /* Highest Extended Function Supported (0x80000004 brand string) */
2230      case 0x80000000:
2231         st->guest_EAX = 0x80000004;
2232         st->guest_EBX = 0x68747541;
2233         st->guest_ECX = 0x444d4163;
2234         st->guest_EDX = 0x69746e65;
2235         break;
2236      /* Extended Processor Info and Feature Bits */
2237      case 0x80000001:
2238         st->guest_EAX = 0x721;
2239         st->guest_EBX = 0x0;
2240         st->guest_ECX = 0x0;
2241         st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2242         break;
2243      /* Processor Brand String "AMD Athlon(tm) Processor" */
2244      case 0x80000002:
2245         st->guest_EAX = 0x20444d41;
2246         st->guest_EBX = 0x6c687441;
2247         st->guest_ECX = 0x74286e6f;
2248         st->guest_EDX = 0x5020296d;
2249         break;
2250      case 0x80000003:
2251         st->guest_EAX = 0x65636f72;
2252         st->guest_EBX = 0x726f7373;
2253         st->guest_ECX = 0x0;
2254         st->guest_EDX = 0x0;
2255         break;
2256      default:
2257         st->guest_EAX = 0x0;
2258         st->guest_EBX = 0x0;
2259         st->guest_ECX = 0x0;
2260         st->guest_EDX = 0x0;
2261         break;
2262   }
2263}
2264
2265/* CALLED FROM GENERATED CODE */
2266/* DIRTY HELPER (modifies guest state) */
2267/* Claim to be the following SSE1-capable CPU:
2268   vendor_id       : GenuineIntel
2269   cpu family      : 6
2270   model           : 11
2271   model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2272   stepping        : 1
2273   cpu MHz         : 1131.013
2274   cache size      : 512 KB
2275*/
2276void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2277{
2278   switch (st->guest_EAX) {
2279      case 0:
2280         st->guest_EAX = 0x00000002;
2281         st->guest_EBX = 0x756e6547;
2282         st->guest_ECX = 0x6c65746e;
2283         st->guest_EDX = 0x49656e69;
2284         break;
2285      case 1:
2286         st->guest_EAX = 0x000006b1;
2287         st->guest_EBX = 0x00000004;
2288         st->guest_ECX = 0x00000000;
2289         st->guest_EDX = 0x0383fbff;
2290         break;
2291      default:
2292         st->guest_EAX = 0x03020101;
2293         st->guest_EBX = 0x00000000;
2294         st->guest_ECX = 0x00000000;
2295         st->guest_EDX = 0x0c040883;
2296         break;
2297   }
2298}
2299
2300/* Claim to be the following SSSE3-capable CPU (2 x ...):
2301   vendor_id       : GenuineIntel
2302   cpu family      : 6
2303   model           : 15
2304   model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2305   stepping        : 6
2306   cpu MHz         : 2394.000
2307   cache size      : 4096 KB
2308   physical id     : 0
2309   siblings        : 2
2310   core id         : 0
2311   cpu cores       : 2
2312   fpu             : yes
2313   fpu_exception   : yes
2314   cpuid level     : 10
2315   wp              : yes
2316   flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2317                     mtrr pge mca cmov pat pse36 clflush dts acpi
2318                     mmx fxsr sse sse2 ss ht tm syscall nx lm
2319                     constant_tsc pni monitor ds_cpl vmx est tm2
2320                     cx16 xtpr lahf_lm
2321   bogomips        : 4798.78
2322   clflush size    : 64
2323   cache_alignment : 64
2324   address sizes   : 36 bits physical, 48 bits virtual
2325   power management:
2326*/
2327void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2328{
2329#  define SET_ABCD(_a,_b,_c,_d)               \
2330      do { st->guest_EAX = (UInt)(_a);        \
2331           st->guest_EBX = (UInt)(_b);        \
2332           st->guest_ECX = (UInt)(_c);        \
2333           st->guest_EDX = (UInt)(_d);        \
2334      } while (0)
2335
2336   switch (st->guest_EAX) {
2337      case 0x00000000:
2338         SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2339         break;
2340      case 0x00000001:
2341         SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2342         break;
2343      case 0x00000002:
2344         SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2345         break;
2346      case 0x00000003:
2347         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2348         break;
2349      case 0x00000004: {
2350         switch (st->guest_ECX) {
2351            case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2352                                      0x0000003f, 0x00000001); break;
2353            case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2354                                      0x0000003f, 0x00000001); break;
2355            case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2356                                      0x00000fff, 0x00000001); break;
2357            default:         SET_ABCD(0x00000000, 0x00000000,
2358                                      0x00000000, 0x00000000); break;
2359         }
2360         break;
2361      }
2362      case 0x00000005:
2363         SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2364         break;
2365      case 0x00000006:
2366         SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2367         break;
2368      case 0x00000007:
2369         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2370         break;
2371      case 0x00000008:
2372         SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2373         break;
2374      case 0x00000009:
2375         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2376         break;
2377      case 0x0000000a:
2378      unhandled_eax_value:
2379         SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2380         break;
2381      case 0x80000000:
2382         SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2383         break;
2384      case 0x80000001:
2385         SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2386         break;
2387      case 0x80000002:
2388         SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2389         break;
2390      case 0x80000003:
2391         SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2392         break;
2393      case 0x80000004:
2394         SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2395         break;
2396      case 0x80000005:
2397         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2398         break;
2399      case 0x80000006:
2400         SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2401         break;
2402      case 0x80000007:
2403         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2404         break;
2405      case 0x80000008:
2406         SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2407         break;
2408      default:
2409         goto unhandled_eax_value;
2410   }
2411#  undef SET_ABCD
2412}
2413
2414
2415/* CALLED FROM GENERATED CODE */
2416/* DIRTY HELPER (non-referentially-transparent) */
2417/* Horrible hack.  On non-x86 platforms, return 0. */
2418UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2419{
2420#  if defined(__i386__)
2421   UInt r = 0;
2422   portno &= 0xFFFF;
2423   switch (sz) {
2424      case 4:
2425         __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2426                              : "=a" (r) : "Nd" (portno));
2427	 break;
2428      case 2:
2429         __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2430                              : "=a" (r) : "Nd" (portno));
2431	 break;
2432      case 1:
2433         __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2434                              : "=a" (r) : "Nd" (portno));
2435	 break;
2436      default:
2437         break;
2438   }
2439   return r;
2440#  else
2441   return 0;
2442#  endif
2443}
2444
2445
2446/* CALLED FROM GENERATED CODE */
2447/* DIRTY HELPER (non-referentially-transparent) */
2448/* Horrible hack.  On non-x86 platforms, do nothing. */
2449void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2450{
2451#  if defined(__i386__)
2452   portno &= 0xFFFF;
2453   switch (sz) {
2454      case 4:
2455         __asm__ __volatile__("outl %0, %w1"
2456                              : : "a" (data), "Nd" (portno));
2457	 break;
2458      case 2:
2459         __asm__ __volatile__("outw %w0, %w1"
2460                              : : "a" (data), "Nd" (portno));
2461	 break;
2462      case 1:
2463         __asm__ __volatile__("outb %b0, %w1"
2464                              : : "a" (data), "Nd" (portno));
2465	 break;
2466      default:
2467         break;
2468   }
2469#  else
2470   /* do nothing */
2471#  endif
2472}
2473
2474/* CALLED FROM GENERATED CODE */
2475/* DIRTY HELPER (non-referentially-transparent) */
2476/* Horrible hack.  On non-x86 platforms, do nothing. */
2477/* op = 0: call the native SGDT instruction.
2478   op = 1: call the native SIDT instruction.
2479*/
2480void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2481#  if defined(__i386__)
2482   switch (op) {
2483      case 0:
2484         __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2485         break;
2486      case 1:
2487         __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2488         break;
2489      default:
2490         vpanic("x86g_dirtyhelper_SxDT");
2491   }
2492#  else
2493   /* do nothing */
2494   UChar* p = (UChar*)address;
2495   p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2496#  endif
2497}
2498
2499/*---------------------------------------------------------------*/
2500/*--- Helpers for MMX/SSE/SSE2.                               ---*/
2501/*---------------------------------------------------------------*/
2502
2503static inline UChar abdU8 ( UChar xx, UChar yy ) {
2504   return toUChar(xx>yy ? xx-yy : yy-xx);
2505}
2506
2507static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2508   return (((ULong)w1) << 32) | ((ULong)w0);
2509}
2510
2511static inline UShort sel16x4_3 ( ULong w64 ) {
2512   UInt hi32 = toUInt(w64 >> 32);
2513   return toUShort(hi32 >> 16);
2514}
2515static inline UShort sel16x4_2 ( ULong w64 ) {
2516   UInt hi32 = toUInt(w64 >> 32);
2517   return toUShort(hi32);
2518}
2519static inline UShort sel16x4_1 ( ULong w64 ) {
2520   UInt lo32 = toUInt(w64);
2521   return toUShort(lo32 >> 16);
2522}
2523static inline UShort sel16x4_0 ( ULong w64 ) {
2524   UInt lo32 = toUInt(w64);
2525   return toUShort(lo32);
2526}
2527
2528static inline UChar sel8x8_7 ( ULong w64 ) {
2529   UInt hi32 = toUInt(w64 >> 32);
2530   return toUChar(hi32 >> 24);
2531}
2532static inline UChar sel8x8_6 ( ULong w64 ) {
2533   UInt hi32 = toUInt(w64 >> 32);
2534   return toUChar(hi32 >> 16);
2535}
2536static inline UChar sel8x8_5 ( ULong w64 ) {
2537   UInt hi32 = toUInt(w64 >> 32);
2538   return toUChar(hi32 >> 8);
2539}
2540static inline UChar sel8x8_4 ( ULong w64 ) {
2541   UInt hi32 = toUInt(w64 >> 32);
2542   return toUChar(hi32 >> 0);
2543}
2544static inline UChar sel8x8_3 ( ULong w64 ) {
2545   UInt lo32 = toUInt(w64);
2546   return toUChar(lo32 >> 24);
2547}
2548static inline UChar sel8x8_2 ( ULong w64 ) {
2549   UInt lo32 = toUInt(w64);
2550   return toUChar(lo32 >> 16);
2551}
2552static inline UChar sel8x8_1 ( ULong w64 ) {
2553   UInt lo32 = toUInt(w64);
2554   return toUChar(lo32 >> 8);
2555}
2556static inline UChar sel8x8_0 ( ULong w64 ) {
2557   UInt lo32 = toUInt(w64);
2558   return toUChar(lo32 >> 0);
2559}
2560
2561/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2562ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2563{
2564   return
2565      mk32x2(
2566         (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2567            + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2568         (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2569            + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2570      );
2571}
2572
2573/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2574ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2575{
2576   UInt t = 0;
2577   t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2578   t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2579   t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2580   t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2581   t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2582   t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2583   t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2584   t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2585   t &= 0xFFFF;
2586   return (ULong)t;
2587}
2588
2589
2590/*---------------------------------------------------------------*/
2591/*--- Helpers for dealing with segment overrides.             ---*/
2592/*---------------------------------------------------------------*/
2593
2594static inline
2595UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2596{
2597   UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2598   UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2599   UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2600   return (hi << 24) | (mid << 16) | lo;
2601}
2602
2603static inline
2604UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2605{
2606    UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2607    UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2608    UInt limit = (hi << 16) | lo;
2609    if (ent->LdtEnt.Bits.Granularity)
2610       limit = (limit << 12) | 0xFFF;
2611    return limit;
2612}
2613
2614/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2615ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2616                              UInt seg_selector, UInt virtual_addr )
2617{
2618   UInt tiBit, base, limit;
2619   VexGuestX86SegDescr* the_descrs;
2620
2621   Bool verboze = False;
2622
2623   /* If this isn't true, we're in Big Trouble. */
2624   vassert(8 == sizeof(VexGuestX86SegDescr));
2625
2626   if (verboze)
2627      vex_printf("x86h_use_seg_selector: "
2628                 "seg_selector = 0x%x, vaddr = 0x%x\n",
2629                 seg_selector, virtual_addr);
2630
2631   /* Check for wildly invalid selector. */
2632   if (seg_selector & ~0xFFFF)
2633      goto bad;
2634
2635   seg_selector &= 0x0000FFFF;
2636
2637   /* Sanity check the segment selector.  Ensure that RPL=11b (least
2638      privilege).  This forms the bottom 2 bits of the selector. */
2639   if ((seg_selector & 3) != 3)
2640      goto bad;
2641
2642   /* Extract the TI bit (0 means GDT, 1 means LDT) */
2643   tiBit = (seg_selector >> 2) & 1;
2644
2645   /* Convert the segment selector onto a table index */
2646   seg_selector >>= 3;
2647   vassert(seg_selector >= 0 && seg_selector < 8192);
2648
2649   if (tiBit == 0) {
2650
2651      /* GDT access. */
2652      /* Do we actually have a GDT to look at? */
2653      if (gdt == 0)
2654         goto bad;
2655
2656      /* Check for access to non-existent entry. */
2657      if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2658         goto bad;
2659
2660      the_descrs = (VexGuestX86SegDescr*)gdt;
2661      base  = get_segdescr_base (&the_descrs[seg_selector]);
2662      limit = get_segdescr_limit(&the_descrs[seg_selector]);
2663
2664   } else {
2665
2666      /* All the same stuff, except for the LDT. */
2667      if (ldt == 0)
2668         goto bad;
2669
2670      if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2671         goto bad;
2672
2673      the_descrs = (VexGuestX86SegDescr*)ldt;
2674      base  = get_segdescr_base (&the_descrs[seg_selector]);
2675      limit = get_segdescr_limit(&the_descrs[seg_selector]);
2676
2677   }
2678
2679   /* Do the limit check.  Note, this check is just slightly too
2680      slack.  Really it should be "if (virtual_addr + size - 1 >=
2681      limit)," but we don't have the size info to hand.  Getting it
2682      could be significantly complex.  */
2683   if (virtual_addr >= limit)
2684      goto bad;
2685
2686   if (verboze)
2687      vex_printf("x86h_use_seg_selector: "
2688                 "base = 0x%x, addr = 0x%x\n",
2689                 base, base + virtual_addr);
2690
2691   /* High 32 bits are zero, indicating success. */
2692   return (ULong)( ((UInt)virtual_addr) + base );
2693
2694 bad:
2695   return 1ULL << 32;
2696}
2697
2698
2699/*---------------------------------------------------------------*/
2700/*--- Helpers for dealing with, and describing,               ---*/
2701/*--- guest state as a whole.                                 ---*/
2702/*---------------------------------------------------------------*/
2703
2704/* Initialise the entire x86 guest state. */
2705/* VISIBLE TO LIBVEX CLIENT */
2706void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2707{
2708   vex_state->host_EvC_FAILADDR = 0;
2709   vex_state->host_EvC_COUNTER = 0;
2710
2711   vex_state->guest_EAX = 0;
2712   vex_state->guest_ECX = 0;
2713   vex_state->guest_EDX = 0;
2714   vex_state->guest_EBX = 0;
2715   vex_state->guest_ESP = 0;
2716   vex_state->guest_EBP = 0;
2717   vex_state->guest_ESI = 0;
2718   vex_state->guest_EDI = 0;
2719
2720   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2721   vex_state->guest_CC_DEP1 = 0;
2722   vex_state->guest_CC_DEP2 = 0;
2723   vex_state->guest_CC_NDEP = 0;
2724   vex_state->guest_DFLAG   = 1; /* forwards */
2725   vex_state->guest_IDFLAG  = 0;
2726   vex_state->guest_ACFLAG  = 0;
2727
2728   vex_state->guest_EIP = 0;
2729
2730   /* Initialise the simulated FPU */
2731   x86g_dirtyhelper_FINIT( vex_state );
2732
2733   /* Initialse the SSE state. */
2734#  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2735
2736   vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2737   SSEZERO(vex_state->guest_XMM0);
2738   SSEZERO(vex_state->guest_XMM1);
2739   SSEZERO(vex_state->guest_XMM2);
2740   SSEZERO(vex_state->guest_XMM3);
2741   SSEZERO(vex_state->guest_XMM4);
2742   SSEZERO(vex_state->guest_XMM5);
2743   SSEZERO(vex_state->guest_XMM6);
2744   SSEZERO(vex_state->guest_XMM7);
2745
2746#  undef SSEZERO
2747
2748   vex_state->guest_CS  = 0;
2749   vex_state->guest_DS  = 0;
2750   vex_state->guest_ES  = 0;
2751   vex_state->guest_FS  = 0;
2752   vex_state->guest_GS  = 0;
2753   vex_state->guest_SS  = 0;
2754   vex_state->guest_LDT = 0;
2755   vex_state->guest_GDT = 0;
2756
2757   vex_state->guest_EMNOTE = EmNote_NONE;
2758
2759   /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2760   vex_state->guest_CMSTART = 0;
2761   vex_state->guest_CMLEN   = 0;
2762
2763   vex_state->guest_NRADDR   = 0;
2764   vex_state->guest_SC_CLASS = 0;
2765   vex_state->guest_IP_AT_SYSCALL = 0;
2766
2767   vex_state->padding1 = 0;
2768}
2769
2770
2771/* Figure out if any part of the guest state contained in minoff
2772   .. maxoff requires precise memory exceptions.  If in doubt return
2773   True (but this generates significantly slower code).
2774
2775   By default we enforce precise exns for guest %ESP, %EBP and %EIP
2776   only.  These are the minimum needed to extract correct stack
2777   backtraces from x86 code.
2778
2779   Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2780*/
2781Bool guest_x86_state_requires_precise_mem_exns (
2782        Int minoff, Int maxoff, VexRegisterUpdates pxControl
2783     )
2784{
2785   Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2786   Int ebp_max = ebp_min + 4 - 1;
2787   Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2788   Int esp_max = esp_min + 4 - 1;
2789   Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2790   Int eip_max = eip_min + 4 - 1;
2791
2792   if (maxoff < esp_min || minoff > esp_max) {
2793      /* no overlap with esp */
2794      if (pxControl == VexRegUpdSpAtMemAccess)
2795         return False; // We only need to check stack pointer.
2796   } else {
2797      return True;
2798   }
2799
2800   if (maxoff < ebp_min || minoff > ebp_max) {
2801      /* no overlap with ebp */
2802   } else {
2803      return True;
2804   }
2805
2806   if (maxoff < eip_min || minoff > eip_max) {
2807      /* no overlap with eip */
2808   } else {
2809      return True;
2810   }
2811
2812   return False;
2813}
2814
2815
2816#define ALWAYSDEFD(field)                           \
2817    { offsetof(VexGuestX86State, field),            \
2818      (sizeof ((VexGuestX86State*)0)->field) }
2819
2820VexGuestLayout
2821   x86guest_layout
2822      = {
2823          /* Total size of the guest state, in bytes. */
2824          .total_sizeB = sizeof(VexGuestX86State),
2825
2826          /* Describe the stack pointer. */
2827          .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2828          .sizeof_SP = 4,
2829
2830          /* Describe the frame pointer. */
2831          .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2832          .sizeof_FP = 4,
2833
2834          /* Describe the instruction pointer. */
2835          .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2836          .sizeof_IP = 4,
2837
2838          /* Describe any sections to be regarded by Memcheck as
2839             'always-defined'. */
2840          .n_alwaysDefd = 24,
2841
2842          /* flags thunk: OP and NDEP are always defd, whereas DEP1
2843             and DEP2 have to be tracked.  See detailed comment in
2844             gdefs.h on meaning of thunk fields. */
2845          .alwaysDefd
2846             = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2847                 /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2848                 /*  2 */ ALWAYSDEFD(guest_DFLAG),
2849                 /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2850                 /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2851                 /*  5 */ ALWAYSDEFD(guest_EIP),
2852                 /*  6 */ ALWAYSDEFD(guest_FTOP),
2853                 /*  7 */ ALWAYSDEFD(guest_FPTAG),
2854                 /*  8 */ ALWAYSDEFD(guest_FPROUND),
2855                 /*  9 */ ALWAYSDEFD(guest_FC3210),
2856                 /* 10 */ ALWAYSDEFD(guest_CS),
2857                 /* 11 */ ALWAYSDEFD(guest_DS),
2858                 /* 12 */ ALWAYSDEFD(guest_ES),
2859                 /* 13 */ ALWAYSDEFD(guest_FS),
2860                 /* 14 */ ALWAYSDEFD(guest_GS),
2861                 /* 15 */ ALWAYSDEFD(guest_SS),
2862                 /* 16 */ ALWAYSDEFD(guest_LDT),
2863                 /* 17 */ ALWAYSDEFD(guest_GDT),
2864                 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2865                 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2866                 /* 20 */ ALWAYSDEFD(guest_CMSTART),
2867                 /* 21 */ ALWAYSDEFD(guest_CMLEN),
2868                 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2869                 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2870               }
2871        };
2872
2873
2874/*---------------------------------------------------------------*/
2875/*--- end                                 guest_x86_helpers.c ---*/
2876/*---------------------------------------------------------------*/
2877