guest_x86_helpers.c revision 663860b1408516d02ebfcb3a9999a134e6cfb223
1
2/*---------------------------------------------------------------*/
3/*--- begin                               guest_x86_helpers.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2012 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_emwarn.h"
38#include "libvex_guest_x86.h"
39#include "libvex_ir.h"
40#include "libvex.h"
41
42#include "main_util.h"
43#include "guest_generic_bb_to_IR.h"
44#include "guest_x86_defs.h"
45#include "guest_generic_x87.h"
46
47
48/* This file contains helper functions for x86 guest code.
49   Calls to these functions are generated by the back end.
50   These calls are of course in the host machine code and
51   this file will be compiled to host machine code, so that
52   all makes sense.
53
54   Only change the signatures of these helper functions very
55   carefully.  If you change the signature here, you'll have to change
56   the parameters passed to it in the IR calls constructed by
57   guest-x86/toIR.c.
58
59   The convention used is that all functions called from generated
60   code are named x86g_<something>, and any function whose name lacks
61   that prefix is not called from generated code.  Note that some
62   LibVEX_* functions can however be called by VEX's client, but that
63   is not the same as calling them from VEX-generated code.
64*/
65
66
67/* Set to 1 to get detailed profiling info about use of the flag
68   machinery. */
69#define PROFILE_EFLAGS 0
70
71
72/*---------------------------------------------------------------*/
73/*--- %eflags run-time helpers.                               ---*/
74/*---------------------------------------------------------------*/
75
76static const UChar parity_table[256] = {
77    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
78    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
79    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
81    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
82    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
83    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
85    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
87    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
89    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
90    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
91    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
93    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
94    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
95    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
97    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
98    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
99    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
101    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
103    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
105    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
106    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
107    X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108    0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
109};
110
111/* generalised left-shifter */
112inline static Int lshift ( Int x, Int n )
113{
114   if (n >= 0)
115      return x << n;
116   else
117      return x >> (-n);
118}
119
120/* identity on ULong */
121static inline ULong idULong ( ULong x )
122{
123   return x;
124}
125
126
127#define PREAMBLE(__data_bits)					\
128   /* const */ UInt DATA_MASK 					\
129      = __data_bits==8 ? 0xFF 					\
130                       : (__data_bits==16 ? 0xFFFF 		\
131                                          : 0xFFFFFFFF); 	\
132   /* const */ UInt SIGN_MASK = 1 << (__data_bits - 1);		\
133   /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
134   /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
135   /* const */ UInt CC_NDEP = cc_ndep_formal;			\
136   /* Four bogus assignments, which hopefully gcc can     */	\
137   /* optimise away, and which stop it complaining about  */	\
138   /* unused variables.                                   */	\
139   SIGN_MASK = SIGN_MASK;					\
140   DATA_MASK = DATA_MASK;					\
141   CC_DEP2 = CC_DEP2;						\
142   CC_NDEP = CC_NDEP;
143
144
145/*-------------------------------------------------------------*/
146
147#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
148{								\
149   PREAMBLE(DATA_BITS);						\
150   { Int cf, pf, af, zf, sf, of;				\
151     Int argL, argR, res;					\
152     argL = CC_DEP1;						\
153     argR = CC_DEP2;						\
154     res  = argL + argR;					\
155     cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
156     pf = parity_table[(UChar)res];				\
157     af = (res ^ argL ^ argR) & 0x10;				\
158     zf = ((DATA_UTYPE)res == 0) << 6;				\
159     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
160     of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
161                 12 - DATA_BITS) & X86G_CC_MASK_O;		\
162     return cf | pf | af | zf | sf | of;			\
163   }								\
164}
165
166/*-------------------------------------------------------------*/
167
168#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
169{								\
170   PREAMBLE(DATA_BITS);						\
171   { Int cf, pf, af, zf, sf, of;				\
172     Int argL, argR, res;					\
173     argL = CC_DEP1;						\
174     argR = CC_DEP2;						\
175     res  = argL - argR;					\
176     cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
177     pf = parity_table[(UChar)res];				\
178     af = (res ^ argL ^ argR) & 0x10;				\
179     zf = ((DATA_UTYPE)res == 0) << 6;				\
180     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
181     of = lshift((argL ^ argR) & (argL ^ res),	 		\
182                 12 - DATA_BITS) & X86G_CC_MASK_O; 		\
183     return cf | pf | af | zf | sf | of;			\
184   }								\
185}
186
187/*-------------------------------------------------------------*/
188
189#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
190{								\
191   PREAMBLE(DATA_BITS);						\
192   { Int cf, pf, af, zf, sf, of;				\
193     Int argL, argR, oldC, res;		       			\
194     oldC = CC_NDEP & X86G_CC_MASK_C;				\
195     argL = CC_DEP1;						\
196     argR = CC_DEP2 ^ oldC;	       				\
197     res  = (argL + argR) + oldC;				\
198     if (oldC)							\
199        cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
200     else							\
201        cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
202     pf = parity_table[(UChar)res];				\
203     af = (res ^ argL ^ argR) & 0x10;				\
204     zf = ((DATA_UTYPE)res == 0) << 6;				\
205     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
206     of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
207                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
208     return cf | pf | af | zf | sf | of;			\
209   }								\
210}
211
212/*-------------------------------------------------------------*/
213
214#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
215{								\
216   PREAMBLE(DATA_BITS);						\
217   { Int cf, pf, af, zf, sf, of;				\
218     Int argL, argR, oldC, res;		       			\
219     oldC = CC_NDEP & X86G_CC_MASK_C;				\
220     argL = CC_DEP1;						\
221     argR = CC_DEP2 ^ oldC;	       				\
222     res  = (argL - argR) - oldC;				\
223     if (oldC)							\
224        cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
225     else							\
226        cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
227     pf = parity_table[(UChar)res];				\
228     af = (res ^ argL ^ argR) & 0x10;				\
229     zf = ((DATA_UTYPE)res == 0) << 6;				\
230     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
231     of = lshift((argL ^ argR) & (argL ^ res), 			\
232                 12 - DATA_BITS) & X86G_CC_MASK_O;		\
233     return cf | pf | af | zf | sf | of;			\
234   }								\
235}
236
237/*-------------------------------------------------------------*/
238
239#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
240{								\
241   PREAMBLE(DATA_BITS);						\
242   { Int cf, pf, af, zf, sf, of;				\
243     cf = 0;							\
244     pf = parity_table[(UChar)CC_DEP1];				\
245     af = 0;							\
246     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
247     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
248     of = 0;							\
249     return cf | pf | af | zf | sf | of;			\
250   }								\
251}
252
253/*-------------------------------------------------------------*/
254
255#define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
256{								\
257   PREAMBLE(DATA_BITS);						\
258   { Int cf, pf, af, zf, sf, of;				\
259     Int argL, argR, res;					\
260     res  = CC_DEP1;						\
261     argL = res - 1;						\
262     argR = 1;							\
263     cf = CC_NDEP & X86G_CC_MASK_C;				\
264     pf = parity_table[(UChar)res];				\
265     af = (res ^ argL ^ argR) & 0x10;				\
266     zf = ((DATA_UTYPE)res == 0) << 6;				\
267     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
268     of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
269     return cf | pf | af | zf | sf | of;			\
270   }								\
271}
272
273/*-------------------------------------------------------------*/
274
275#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
276{								\
277   PREAMBLE(DATA_BITS);						\
278   { Int cf, pf, af, zf, sf, of;				\
279     Int argL, argR, res;					\
280     res  = CC_DEP1;						\
281     argL = res + 1;						\
282     argR = 1;							\
283     cf = CC_NDEP & X86G_CC_MASK_C;				\
284     pf = parity_table[(UChar)res];				\
285     af = (res ^ argL ^ argR) & 0x10;				\
286     zf = ((DATA_UTYPE)res == 0) << 6;				\
287     sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
288     of = ((res & DATA_MASK) 					\
289          == ((UInt)SIGN_MASK - 1)) << 11;			\
290     return cf | pf | af | zf | sf | of;			\
291   }								\
292}
293
294/*-------------------------------------------------------------*/
295
296#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
297{								\
298   PREAMBLE(DATA_BITS);						\
299   { Int cf, pf, af, zf, sf, of;				\
300     cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
301     pf = parity_table[(UChar)CC_DEP1];				\
302     af = 0; /* undefined */					\
303     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
304     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
305     /* of is defined if shift count == 1 */			\
306     of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
307          & X86G_CC_MASK_O;					\
308     return cf | pf | af | zf | sf | of;			\
309   }								\
310}
311
312/*-------------------------------------------------------------*/
313
314#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
315{								\
316   PREAMBLE(DATA_BITS);  					\
317   { Int cf, pf, af, zf, sf, of;				\
318     cf = CC_DEP2 & 1;						\
319     pf = parity_table[(UChar)CC_DEP1];				\
320     af = 0; /* undefined */					\
321     zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
322     sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
323     /* of is defined if shift count == 1 */			\
324     of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
325          & X86G_CC_MASK_O;					\
326     return cf | pf | af | zf | sf | of;			\
327   }								\
328}
329
330/*-------------------------------------------------------------*/
331
332/* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
333/* DEP1 = result, NDEP = old flags */
334#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
335{								\
336   PREAMBLE(DATA_BITS);						\
337   { Int fl 							\
338        = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
339          | (X86G_CC_MASK_C & CC_DEP1)				\
340          | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
341                                      11-(DATA_BITS-1)) 	\
342                     ^ lshift(CC_DEP1, 11)));			\
343     return fl;							\
344   }								\
345}
346
347/*-------------------------------------------------------------*/
348
349/* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
350/* DEP1 = result, NDEP = old flags */
351#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
352{								\
353   PREAMBLE(DATA_BITS);						\
354   { Int fl 							\
355        = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
356          | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
357          | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
358                                      11-(DATA_BITS-1)) 	\
359                     ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
360     return fl;							\
361   }								\
362}
363
364/*-------------------------------------------------------------*/
365
366#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
367                                DATA_U2TYPE, NARROWto2U)        \
368{                                                               \
369   PREAMBLE(DATA_BITS);                                         \
370   { Int cf, pf, af, zf, sf, of;                                \
371     DATA_UTYPE  hi;                                            \
372     DATA_UTYPE  lo                                             \
373        = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
374                     * ((DATA_UTYPE)CC_DEP2) );                 \
375     DATA_U2TYPE rr                                             \
376        = NARROWto2U(                                           \
377             ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
378             * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
379     hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
380     cf = (hi != 0);                                            \
381     pf = parity_table[(UChar)lo];                              \
382     af = 0; /* undefined */                                    \
383     zf = (lo == 0) << 6;                                       \
384     sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
385     of = cf << 11;                                             \
386     return cf | pf | af | zf | sf | of;                        \
387   }								\
388}
389
390/*-------------------------------------------------------------*/
391
392#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
393                                DATA_S2TYPE, NARROWto2S)        \
394{                                                               \
395   PREAMBLE(DATA_BITS);                                         \
396   { Int cf, pf, af, zf, sf, of;                                \
397     DATA_STYPE  hi;                                            \
398     DATA_STYPE  lo                                             \
399        = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
400                     * ((DATA_STYPE)CC_DEP2) );                 \
401     DATA_S2TYPE rr                                             \
402        = NARROWto2S(                                           \
403             ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
404             * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
405     hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
406     cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
407     pf = parity_table[(UChar)lo];                              \
408     af = 0; /* undefined */                                    \
409     zf = (lo == 0) << 6;                                       \
410     sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
411     of = cf << 11;                                             \
412     return cf | pf | af | zf | sf | of;                        \
413   }								\
414}
415
416
417#if PROFILE_EFLAGS
418
419static Bool initted     = False;
420
421/* C flag, fast route */
422static UInt tabc_fast[X86G_CC_OP_NUMBER];
423/* C flag, slow route */
424static UInt tabc_slow[X86G_CC_OP_NUMBER];
425/* table for calculate_cond */
426static UInt tab_cond[X86G_CC_OP_NUMBER][16];
427/* total entry counts for calc_all, calc_c, calc_cond. */
428static UInt n_calc_all  = 0;
429static UInt n_calc_c    = 0;
430static UInt n_calc_cond = 0;
431
432#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
433
434
435static void showCounts ( void )
436{
437   Int op, co;
438   Char ch;
439   vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
440              n_calc_all, n_calc_cond, n_calc_c);
441
442   vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
443              "    S   NS    P   NP    L   NL   LE  NLE\n");
444   vex_printf("     -----------------------------------------------------"
445              "----------------------------------------\n");
446   for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
447
448      ch = ' ';
449      if (op > 0 && (op-1) % 3 == 0)
450         ch = 'B';
451      if (op > 0 && (op-1) % 3 == 1)
452         ch = 'W';
453      if (op > 0 && (op-1) % 3 == 2)
454         ch = 'L';
455
456      vex_printf("%2d%c: ", op, ch);
457      vex_printf("%6u ", tabc_slow[op]);
458      vex_printf("%6u ", tabc_fast[op]);
459      for (co = 0; co < 16; co++) {
460         Int n = tab_cond[op][co];
461         if (n >= 1000) {
462            vex_printf(" %3dK", n / 1000);
463         } else
464         if (n >= 0) {
465            vex_printf(" %3d ", n );
466         } else {
467            vex_printf("     ");
468         }
469      }
470      vex_printf("\n");
471   }
472   vex_printf("\n");
473}
474
475static void initCounts ( void )
476{
477   Int op, co;
478   initted = True;
479   for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
480      tabc_fast[op] = tabc_slow[op] = 0;
481      for (co = 0; co < 16; co++)
482         tab_cond[op][co] = 0;
483   }
484}
485
486#endif /* PROFILE_EFLAGS */
487
488
489/* CALLED FROM GENERATED CODE: CLEAN HELPER */
490/* Calculate all the 6 flags from the supplied thunk parameters.
491   Worker function, not directly called from generated code. */
492static
493UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
494                                     UInt cc_dep1_formal,
495                                     UInt cc_dep2_formal,
496                                     UInt cc_ndep_formal )
497{
498   switch (cc_op) {
499      case X86G_CC_OP_COPY:
500         return cc_dep1_formal
501                & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
502                   | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
503
504      case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
505      case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
506      case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
507
508      case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
509      case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
510      case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
511
512      case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
513      case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
514      case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
515
516      case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
517      case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
518      case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
519
520      case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
521      case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
522      case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
523
524      case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
525      case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
526      case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
527
528      case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
529      case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
530      case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
531
532      case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
533      case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
534      case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
535
536      case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
537      case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
538      case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
539
540      case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
541      case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
542      case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
543
544      case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
545      case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
546      case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
547
548      case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
549                                                UShort, toUShort );
550      case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
551                                                UInt,   toUInt );
552      case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
553                                                ULong,  idULong );
554
555      case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
556                                                Short,  toUShort );
557      case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
558                                                Int,    toUInt   );
559      case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
560                                                Long,   idULong );
561
562      default:
563         /* shouldn't really make these calls from generated code */
564         vex_printf("x86g_calculate_eflags_all_WRK(X86)"
565                    "( %u, 0x%x, 0x%x, 0x%x )\n",
566                    cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
567         vpanic("x86g_calculate_eflags_all_WRK(X86)");
568   }
569}
570
571
572/* CALLED FROM GENERATED CODE: CLEAN HELPER */
573/* Calculate all the 6 flags from the supplied thunk parameters. */
574UInt x86g_calculate_eflags_all ( UInt cc_op,
575                                 UInt cc_dep1,
576                                 UInt cc_dep2,
577                                 UInt cc_ndep )
578{
579#  if PROFILE_EFLAGS
580   if (!initted) initCounts();
581   n_calc_all++;
582   if (SHOW_COUNTS_NOW) showCounts();
583#  endif
584   return
585      x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
586}
587
588
589/* CALLED FROM GENERATED CODE: CLEAN HELPER */
590/* Calculate just the carry flag from the supplied thunk parameters. */
591VEX_REGPARM(3)
592UInt x86g_calculate_eflags_c ( UInt cc_op,
593                               UInt cc_dep1,
594                               UInt cc_dep2,
595                               UInt cc_ndep )
596{
597#  if PROFILE_EFLAGS
598   if (!initted) initCounts();
599   n_calc_c++;
600   tabc_fast[cc_op]++;
601   if (SHOW_COUNTS_NOW) showCounts();
602#  endif
603
604   /* Fast-case some common ones. */
605   switch (cc_op) {
606      case X86G_CC_OP_LOGICL:
607      case X86G_CC_OP_LOGICW:
608      case X86G_CC_OP_LOGICB:
609         return 0;
610      case X86G_CC_OP_SUBL:
611         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
612                   ? X86G_CC_MASK_C : 0;
613      case X86G_CC_OP_SUBW:
614         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
615                   ? X86G_CC_MASK_C : 0;
616      case X86G_CC_OP_SUBB:
617         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
618                   ? X86G_CC_MASK_C : 0;
619      case X86G_CC_OP_INCL:
620      case X86G_CC_OP_DECL:
621         return cc_ndep & X86G_CC_MASK_C;
622      default:
623         break;
624   }
625
626#  if PROFILE_EFLAGS
627   tabc_fast[cc_op]--;
628   tabc_slow[cc_op]++;
629#  endif
630
631   return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
632          & X86G_CC_MASK_C;
633}
634
635
636/* CALLED FROM GENERATED CODE: CLEAN HELPER */
637/* returns 1 or 0 */
638UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
639                                UInt cc_op,
640                                UInt cc_dep1,
641                                UInt cc_dep2,
642                                UInt cc_ndep )
643{
644   UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
645                                               cc_dep2, cc_ndep);
646   UInt of,sf,zf,cf,pf;
647   UInt inv = cond & 1;
648
649#  if PROFILE_EFLAGS
650   if (!initted) initCounts();
651   tab_cond[cc_op][cond]++;
652   n_calc_cond++;
653   if (SHOW_COUNTS_NOW) showCounts();
654#  endif
655
656   switch (cond) {
657      case X86CondNO:
658      case X86CondO: /* OF == 1 */
659         of = eflags >> X86G_CC_SHIFT_O;
660         return 1 & (inv ^ of);
661
662      case X86CondNZ:
663      case X86CondZ: /* ZF == 1 */
664         zf = eflags >> X86G_CC_SHIFT_Z;
665         return 1 & (inv ^ zf);
666
667      case X86CondNB:
668      case X86CondB: /* CF == 1 */
669         cf = eflags >> X86G_CC_SHIFT_C;
670         return 1 & (inv ^ cf);
671         break;
672
673      case X86CondNBE:
674      case X86CondBE: /* (CF or ZF) == 1 */
675         cf = eflags >> X86G_CC_SHIFT_C;
676         zf = eflags >> X86G_CC_SHIFT_Z;
677         return 1 & (inv ^ (cf | zf));
678         break;
679
680      case X86CondNS:
681      case X86CondS: /* SF == 1 */
682         sf = eflags >> X86G_CC_SHIFT_S;
683         return 1 & (inv ^ sf);
684
685      case X86CondNP:
686      case X86CondP: /* PF == 1 */
687         pf = eflags >> X86G_CC_SHIFT_P;
688         return 1 & (inv ^ pf);
689
690      case X86CondNL:
691      case X86CondL: /* (SF xor OF) == 1 */
692         sf = eflags >> X86G_CC_SHIFT_S;
693         of = eflags >> X86G_CC_SHIFT_O;
694         return 1 & (inv ^ (sf ^ of));
695         break;
696
697      case X86CondNLE:
698      case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
699         sf = eflags >> X86G_CC_SHIFT_S;
700         of = eflags >> X86G_CC_SHIFT_O;
701         zf = eflags >> X86G_CC_SHIFT_Z;
702         return 1 & (inv ^ ((sf ^ of) | zf));
703         break;
704
705      default:
706         /* shouldn't really make these calls from generated code */
707         vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
708                    cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
709         vpanic("x86g_calculate_condition");
710   }
711}
712
713
714/* VISIBLE TO LIBVEX CLIENT */
715UInt LibVEX_GuestX86_get_eflags ( /*IN*/VexGuestX86State* vex_state )
716{
717   UInt eflags = x86g_calculate_eflags_all_WRK(
718                    vex_state->guest_CC_OP,
719                    vex_state->guest_CC_DEP1,
720                    vex_state->guest_CC_DEP2,
721                    vex_state->guest_CC_NDEP
722                 );
723   UInt dflag = vex_state->guest_DFLAG;
724   vassert(dflag == 1 || dflag == 0xFFFFFFFF);
725   if (dflag == 0xFFFFFFFF)
726      eflags |= (1<<10);
727   if (vex_state->guest_IDFLAG == 1)
728      eflags |= (1<<21);
729   if (vex_state->guest_ACFLAG == 1)
730      eflags |= (1<<18);
731
732   return eflags;
733}
734
735/* VISIBLE TO LIBVEX CLIENT */
736void
737LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
738                              /*MOD*/VexGuestX86State* vex_state )
739{
740   UInt oszacp = x86g_calculate_eflags_all_WRK(
741                    vex_state->guest_CC_OP,
742                    vex_state->guest_CC_DEP1,
743                    vex_state->guest_CC_DEP2,
744                    vex_state->guest_CC_NDEP
745                 );
746   if (new_carry_flag & 1) {
747      oszacp |= X86G_CC_MASK_C;
748   } else {
749      oszacp &= ~X86G_CC_MASK_C;
750   }
751   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
752   vex_state->guest_CC_DEP1 = oszacp;
753   vex_state->guest_CC_DEP2 = 0;
754   vex_state->guest_CC_NDEP = 0;
755}
756
757
758/*---------------------------------------------------------------*/
759/*--- %eflags translation-time function specialisers.         ---*/
760/*--- These help iropt specialise calls the above run-time    ---*/
761/*--- %eflags functions.                                      ---*/
762/*---------------------------------------------------------------*/
763
764/* Used by the optimiser to try specialisations.  Returns an
765   equivalent expression, or NULL if none. */
766
767static inline Bool isU32 ( IRExpr* e, UInt n )
768{
769   return
770      toBool( e->tag == Iex_Const
771              && e->Iex.Const.con->tag == Ico_U32
772              && e->Iex.Const.con->Ico.U32 == n );
773}
774
775IRExpr* guest_x86_spechelper ( HChar*   function_name,
776                               IRExpr** args,
777                               IRStmt** precedingStmts,
778                               Int      n_precedingStmts )
779{
780#  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
781#  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
782#  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
783#  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
784
785   Int i, arity = 0;
786   for (i = 0; args[i]; i++)
787      arity++;
788#  if 0
789   vex_printf("spec request:\n");
790   vex_printf("   %s  ", function_name);
791   for (i = 0; i < arity; i++) {
792      vex_printf("  ");
793      ppIRExpr(args[i]);
794   }
795   vex_printf("\n");
796#  endif
797
798   /* --------- specialising "x86g_calculate_condition" --------- */
799
800   if (vex_streq(function_name, "x86g_calculate_condition")) {
801      /* specialise calls to above "calculate condition" function */
802      IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
803      vassert(arity == 5);
804      cond    = args[0];
805      cc_op   = args[1];
806      cc_dep1 = args[2];
807      cc_dep2 = args[3];
808
809      /*---------------- ADDL ----------------*/
810
811      if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
812         /* long add, then Z --> test (dst+src == 0) */
813         return unop(Iop_1Uto32,
814                     binop(Iop_CmpEQ32,
815                           binop(Iop_Add32, cc_dep1, cc_dep2),
816                           mkU32(0)));
817      }
818
819      /*---------------- SUBL ----------------*/
820
821      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
822         /* long sub/cmp, then Z --> test dst==src */
823         return unop(Iop_1Uto32,
824                     binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
825      }
826      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
827         /* long sub/cmp, then NZ --> test dst!=src */
828         return unop(Iop_1Uto32,
829                     binop(Iop_CmpNE32, cc_dep1, cc_dep2));
830      }
831
832      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
833         /* long sub/cmp, then L (signed less than)
834            --> test dst <s src */
835         return unop(Iop_1Uto32,
836                     binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
837      }
838      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
839         /* long sub/cmp, then NL (signed greater than or equal)
840            --> test !(dst <s src) */
841         return binop(Iop_Xor32,
842                      unop(Iop_1Uto32,
843                           binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
844                      mkU32(1));
845      }
846
847      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
848         /* long sub/cmp, then LE (signed less than or equal)
849            --> test dst <=s src */
850         return unop(Iop_1Uto32,
851                     binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
852      }
853      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
854         /* long sub/cmp, then NLE (signed not less than or equal)
855            --> test dst >s src
856            --> test !(dst <=s src) */
857         return binop(Iop_Xor32,
858                      unop(Iop_1Uto32,
859                           binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
860                      mkU32(1));
861      }
862
863      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
864         /* long sub/cmp, then BE (unsigned less than or equal)
865            --> test dst <=u src */
866         return unop(Iop_1Uto32,
867                     binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
868      }
869      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
870         /* long sub/cmp, then BE (unsigned greater than)
871            --> test !(dst <=u src) */
872         return binop(Iop_Xor32,
873                      unop(Iop_1Uto32,
874                           binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
875                      mkU32(1));
876      }
877
878      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
879         /* long sub/cmp, then B (unsigned less than)
880            --> test dst <u src */
881         return unop(Iop_1Uto32,
882                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
883      }
884      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
885         /* long sub/cmp, then NB (unsigned greater than or equal)
886            --> test !(dst <u src) */
887         return binop(Iop_Xor32,
888                      unop(Iop_1Uto32,
889                           binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
890                      mkU32(1));
891      }
892
893      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
894         /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
895         return unop(Iop_1Uto32,
896                     binop(Iop_CmpLT32S,
897                           binop(Iop_Sub32, cc_dep1, cc_dep2),
898                           mkU32(0)));
899      }
900      if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
901         /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
902         return binop(Iop_Xor32,
903                      unop(Iop_1Uto32,
904                           binop(Iop_CmpLT32S,
905                                 binop(Iop_Sub32, cc_dep1, cc_dep2),
906                                 mkU32(0))),
907                      mkU32(1));
908      }
909
910      /*---------------- SUBW ----------------*/
911
912      if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
913         /* word sub/cmp, then Z --> test dst==src */
914         return unop(Iop_1Uto32,
915                     binop(Iop_CmpEQ16,
916                           unop(Iop_32to16,cc_dep1),
917                           unop(Iop_32to16,cc_dep2)));
918      }
919      if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
920         /* word sub/cmp, then NZ --> test dst!=src */
921         return unop(Iop_1Uto32,
922                     binop(Iop_CmpNE16,
923                           unop(Iop_32to16,cc_dep1),
924                           unop(Iop_32to16,cc_dep2)));
925      }
926
927      /*---------------- SUBB ----------------*/
928
929      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
930         /* byte sub/cmp, then Z --> test dst==src */
931         return unop(Iop_1Uto32,
932                     binop(Iop_CmpEQ8,
933                           unop(Iop_32to8,cc_dep1),
934                           unop(Iop_32to8,cc_dep2)));
935      }
936      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
937         /* byte sub/cmp, then NZ --> test dst!=src */
938         return unop(Iop_1Uto32,
939                     binop(Iop_CmpNE8,
940                           unop(Iop_32to8,cc_dep1),
941                           unop(Iop_32to8,cc_dep2)));
942      }
943
944      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
945         /* byte sub/cmp, then NBE (unsigned greater than)
946            --> test src <u dst */
947         /* Note, args are opposite way round from the usual */
948         return unop(Iop_1Uto32,
949                     binop(Iop_CmpLT32U,
950                           binop(Iop_And32,cc_dep2,mkU32(0xFF)),
951			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
952      }
953
954      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
955                                        && isU32(cc_dep2, 0)) {
956         /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
957                                         --> test dst <s 0
958                                         --> (UInt)dst[7]
959            This is yet another scheme by which gcc figures out if the
960            top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
961         /* Note: isU32(cc_dep2, 0) is correct, even though this is
962            for an 8-bit comparison, since the args to the helper
963            function are always U32s. */
964         return binop(Iop_And32,
965                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
966                      mkU32(1));
967      }
968      if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
969                                        && isU32(cc_dep2, 0)) {
970         /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
971                                          --> test !(dst <s 0)
972                                          --> (UInt) !dst[7]
973         */
974         return binop(Iop_Xor32,
975                      binop(Iop_And32,
976                            binop(Iop_Shr32,cc_dep1,mkU8(7)),
977                            mkU32(1)),
978                mkU32(1));
979      }
980
981      /*---------------- LOGICL ----------------*/
982
983      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
984         /* long and/or/xor, then Z --> test dst==0 */
985         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
986      }
987      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
988         /* long and/or/xor, then NZ --> test dst!=0 */
989         return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
990      }
991
992      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
993         /* long and/or/xor, then LE
994            This is pretty subtle.  LOGIC sets SF and ZF according to the
995            result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
996            OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
997            the result is <=signed 0.  Hence ...
998         */
999         return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1000      }
1001
1002      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1003         /* long and/or/xor, then BE
1004            LOGIC sets ZF according to the result and makes CF be zero.
1005            BE computes (CF | ZF), but CF is zero, so this reduces ZF
1006            -- which will be 1 iff the result is zero.  Hence ...
1007         */
1008         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1009      }
1010
1011      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1012         /* see comment below for (LOGICB, CondS) */
1013         /* long and/or/xor, then S --> (UInt)result[31] */
1014         return binop(Iop_And32,
1015                      binop(Iop_Shr32,cc_dep1,mkU8(31)),
1016                      mkU32(1));
1017      }
1018      if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1019         /* see comment below for (LOGICB, CondNS) */
1020         /* long and/or/xor, then S --> (UInt) ~ result[31] */
1021         return binop(Iop_Xor32,
1022                binop(Iop_And32,
1023                      binop(Iop_Shr32,cc_dep1,mkU8(31)),
1024                      mkU32(1)),
1025                mkU32(1));
1026      }
1027
1028      /*---------------- LOGICW ----------------*/
1029
1030      if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1031         /* word and/or/xor, then Z --> test dst==0 */
1032         return unop(Iop_1Uto32,
1033                     binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1034                                        mkU32(0)));
1035      }
1036
1037      if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1038         /* see comment below for (LOGICB, CondS) */
1039         /* word and/or/xor, then S --> (UInt)result[15] */
1040         return binop(Iop_And32,
1041                      binop(Iop_Shr32,cc_dep1,mkU8(15)),
1042                      mkU32(1));
1043      }
1044
1045      /*---------------- LOGICB ----------------*/
1046
1047      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1048         /* byte and/or/xor, then Z --> test dst==0 */
1049         return unop(Iop_1Uto32,
1050                     binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1051                                        mkU32(0)));
1052      }
1053      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1054         /* byte and/or/xor, then Z --> test dst!=0 */
1055         /* b9ac9:       84 c0                   test   %al,%al
1056            b9acb:       75 0d                   jne    b9ada */
1057         return unop(Iop_1Uto32,
1058                     binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1059                                        mkU32(0)));
1060      }
1061
1062      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1063         /* this is an idiom gcc sometimes uses to find out if the top
1064            bit of a byte register is set: eg testb %al,%al; js ..
1065            Since it just depends on the top bit of the byte, extract
1066            that bit and explicitly get rid of all the rest.  This
1067            helps memcheck avoid false positives in the case where any
1068            of the other bits in the byte are undefined. */
1069         /* byte and/or/xor, then S --> (UInt)result[7] */
1070         return binop(Iop_And32,
1071                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1072                      mkU32(1));
1073      }
1074      if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1075         /* ditto, for negation-of-S. */
1076         /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1077         return binop(Iop_Xor32,
1078                binop(Iop_And32,
1079                      binop(Iop_Shr32,cc_dep1,mkU8(7)),
1080                      mkU32(1)),
1081                mkU32(1));
1082      }
1083
1084      /*---------------- DECL ----------------*/
1085
1086      if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1087         /* dec L, then Z --> test dst == 0 */
1088         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1089      }
1090
1091      if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1092         /* dec L, then S --> compare DST <s 0 */
1093         return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1094      }
1095
1096      /*---------------- DECW ----------------*/
1097
1098      if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1099         /* dec W, then Z --> test dst == 0 */
1100         return unop(Iop_1Uto32,
1101                     binop(Iop_CmpEQ32,
1102                           binop(Iop_Shl32,cc_dep1,mkU8(16)),
1103                           mkU32(0)));
1104      }
1105
1106      /*---------------- INCW ----------------*/
1107
1108      if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1109         /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1110         /* inc W, then Z --> test dst == 0 */
1111         return unop(Iop_1Uto32,
1112                     binop(Iop_CmpEQ32,
1113                           binop(Iop_Shl32,cc_dep1,mkU8(16)),
1114                           mkU32(0)));
1115      }
1116
1117      /*---------------- SHRL ----------------*/
1118
1119      if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1120         /* SHRL, then Z --> test dep1 == 0 */
1121         return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1122      }
1123
1124      /*---------------- COPY ----------------*/
1125      /* This can happen, as a result of x87 FP compares: "fcom ... ;
1126         fnstsw %ax ; sahf ; jbe" for example. */
1127
1128      if (isU32(cc_op, X86G_CC_OP_COPY) &&
1129          (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1130         /* COPY, then BE --> extract C and Z from dep1, and test
1131            (C or Z) == 1. */
1132         /* COPY, then NBE --> extract C and Z from dep1, and test
1133            (C or Z) == 0. */
1134         UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1135         return
1136            unop(
1137               Iop_1Uto32,
1138               binop(
1139                  Iop_CmpEQ32,
1140                  binop(
1141                     Iop_And32,
1142                     binop(
1143                        Iop_Or32,
1144                        binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1145                        binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1146                     ),
1147                     mkU32(1)
1148                  ),
1149                  mkU32(nnn)
1150               )
1151            );
1152      }
1153
1154      if (isU32(cc_op, X86G_CC_OP_COPY)
1155          && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1156         /* COPY, then B --> extract C from dep1, and test (C == 1). */
1157         /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1158         UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1159         return
1160            unop(
1161               Iop_1Uto32,
1162               binop(
1163                  Iop_CmpEQ32,
1164                  binop(
1165                     Iop_And32,
1166                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1167                     mkU32(1)
1168                  ),
1169                  mkU32(nnn)
1170               )
1171            );
1172      }
1173
1174      if (isU32(cc_op, X86G_CC_OP_COPY)
1175          && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1176         /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1177         /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1178         UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1179         return
1180            unop(
1181               Iop_1Uto32,
1182               binop(
1183                  Iop_CmpEQ32,
1184                  binop(
1185                     Iop_And32,
1186                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1187                     mkU32(1)
1188                  ),
1189                  mkU32(nnn)
1190               )
1191            );
1192      }
1193
1194      if (isU32(cc_op, X86G_CC_OP_COPY)
1195          && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1196         /* COPY, then P --> extract P from dep1, and test (P == 1). */
1197         /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1198         UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1199         return
1200            unop(
1201               Iop_1Uto32,
1202               binop(
1203                  Iop_CmpEQ32,
1204                  binop(
1205                     Iop_And32,
1206                     binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1207                     mkU32(1)
1208                  ),
1209                  mkU32(nnn)
1210               )
1211            );
1212      }
1213
1214      return NULL;
1215   }
1216
1217   /* --------- specialising "x86g_calculate_eflags_c" --------- */
1218
1219   if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1220      /* specialise calls to above "calculate_eflags_c" function */
1221      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1222      vassert(arity == 4);
1223      cc_op   = args[0];
1224      cc_dep1 = args[1];
1225      cc_dep2 = args[2];
1226      cc_ndep = args[3];
1227
1228      if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1229         /* C after sub denotes unsigned less than */
1230         return unop(Iop_1Uto32,
1231                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1232      }
1233      if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1234         /* C after sub denotes unsigned less than */
1235         return unop(Iop_1Uto32,
1236                     binop(Iop_CmpLT32U,
1237                           binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1238                           binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1239      }
1240      if (isU32(cc_op, X86G_CC_OP_LOGICL)
1241          || isU32(cc_op, X86G_CC_OP_LOGICW)
1242          || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1243         /* cflag after logic is zero */
1244         return mkU32(0);
1245      }
1246      if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1247         /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1248         return cc_ndep;
1249      }
1250      if (isU32(cc_op, X86G_CC_OP_COPY)) {
1251         /* cflag after COPY is stored in DEP1. */
1252         return
1253            binop(
1254               Iop_And32,
1255               binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1256               mkU32(1)
1257            );
1258      }
1259      if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1260         /* C after add denotes sum <u either arg */
1261         return unop(Iop_1Uto32,
1262                     binop(Iop_CmpLT32U,
1263                           binop(Iop_Add32, cc_dep1, cc_dep2),
1264                           cc_dep1));
1265      }
1266      // ATC, requires verification, no test case known
1267      //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1268      //   /* C after signed widening multiply denotes the case where
1269      //      the top half of the result isn't simply the sign extension
1270      //      of the bottom half (iow the result doesn't fit completely
1271      //      in the bottom half).  Hence:
1272      //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1273      //      where 'x' denotes signed widening multiply.*/
1274      //   return
1275      //      unop(Iop_1Uto32,
1276      //           binop(Iop_CmpNE32,
1277      //                 unop(Iop_64HIto32,
1278      //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1279      //                 binop(Iop_Sar32,
1280      //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1281      //}
1282#     if 0
1283      if (cc_op->tag == Iex_Const) {
1284         vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1285      }
1286#     endif
1287
1288      return NULL;
1289   }
1290
1291   /* --------- specialising "x86g_calculate_eflags_all" --------- */
1292
1293   if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1294      /* specialise calls to above "calculate_eflags_all" function */
1295      IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1296      vassert(arity == 4);
1297      cc_op   = args[0];
1298      cc_dep1 = args[1];
1299      /* cc_dep2 = args[2]; */
1300      /* cc_ndep = args[3]; */
1301
1302      if (isU32(cc_op, X86G_CC_OP_COPY)) {
1303         /* eflags after COPY are stored in DEP1. */
1304         return
1305            binop(
1306               Iop_And32,
1307               cc_dep1,
1308               mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1309                     | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1310            );
1311      }
1312      return NULL;
1313   }
1314
1315#  undef unop
1316#  undef binop
1317#  undef mkU32
1318#  undef mkU8
1319
1320   return NULL;
1321}
1322
1323
1324/*---------------------------------------------------------------*/
1325/*--- Supporting functions for x87 FPU activities.            ---*/
1326/*---------------------------------------------------------------*/
1327
1328static inline Bool host_is_little_endian ( void )
1329{
1330   UInt x = 0x76543210;
1331   UChar* p = (UChar*)(&x);
1332   return toBool(*p == 0x10);
1333}
1334
1335/* 80 and 64-bit floating point formats:
1336
1337   80-bit:
1338
1339    S  0       0-------0      zero
1340    S  0       0X------X      denormals
1341    S  1-7FFE  1X------X      normals (all normals have leading 1)
1342    S  7FFF    10------0      infinity
1343    S  7FFF    10X-----X      snan
1344    S  7FFF    11X-----X      qnan
1345
1346   S is the sign bit.  For runs X----X, at least one of the Xs must be
1347   nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1348   there is an explicitly represented leading 1, and a sign bit,
1349   giving 80 in total.
1350
1351   64-bit avoids the confusion of an explicitly represented leading 1
1352   and so is simpler:
1353
1354    S  0      0------0   zero
1355    S  0      X------X   denormals
1356    S  1-7FE  any        normals
1357    S  7FF    0------0   infinity
1358    S  7FF    0X-----X   snan
1359    S  7FF    1X-----X   qnan
1360
1361   Exponent is 11 bits, fractional part is 52 bits, and there is a
1362   sign bit, giving 64 in total.
1363*/
1364
1365/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1366/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1367UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1368{
1369   Bool   mantissaIsZero;
1370   Int    bexp;
1371   UChar  sign;
1372   UChar* f64;
1373
1374   vassert(host_is_little_endian());
1375
1376   /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1377
1378   f64  = (UChar*)(&dbl);
1379   sign = toUChar( (f64[7] >> 7) & 1 );
1380
1381   /* First off, if the tag indicates the register was empty,
1382      return 1,0,sign,1 */
1383   if (tag == 0) {
1384      /* vex_printf("Empty\n"); */
1385      return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1386                                 | X86G_FC_MASK_C0;
1387   }
1388
1389   bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1390   bexp &= 0x7FF;
1391
1392   mantissaIsZero
1393      = toBool(
1394           (f64[6] & 0x0F) == 0
1395           && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1396        );
1397
1398   /* If both exponent and mantissa are zero, the value is zero.
1399      Return 1,0,sign,0. */
1400   if (bexp == 0 && mantissaIsZero) {
1401      /* vex_printf("Zero\n"); */
1402      return X86G_FC_MASK_C3 | 0
1403                             | (sign << X86G_FC_SHIFT_C1) | 0;
1404   }
1405
1406   /* If exponent is zero but mantissa isn't, it's a denormal.
1407      Return 1,1,sign,0. */
1408   if (bexp == 0 && !mantissaIsZero) {
1409      /* vex_printf("Denormal\n"); */
1410      return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1411                             | (sign << X86G_FC_SHIFT_C1) | 0;
1412   }
1413
1414   /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1415      Return 0,1,sign,1. */
1416   if (bexp == 0x7FF && mantissaIsZero) {
1417      /* vex_printf("Inf\n"); */
1418      return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1419                                 | X86G_FC_MASK_C0;
1420   }
1421
1422   /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1423      Return 0,0,sign,1. */
1424   if (bexp == 0x7FF && !mantissaIsZero) {
1425      /* vex_printf("NaN\n"); */
1426      return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1427   }
1428
1429   /* Uh, ok, we give up.  It must be a normal finite number.
1430      Return 0,1,sign,0.
1431   */
1432   /* vex_printf("normal\n"); */
1433   return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1434}
1435
1436
1437/* CALLED FROM GENERATED CODE */
1438/* DIRTY HELPER (reads guest memory) */
1439ULong x86g_dirtyhelper_loadF80le ( UInt addrU )
1440{
1441   ULong f64;
1442   convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
1443   return f64;
1444}
1445
1446/* CALLED FROM GENERATED CODE */
1447/* DIRTY HELPER (writes guest memory) */
1448void x86g_dirtyhelper_storeF80le ( UInt addrU, ULong f64 )
1449{
1450   convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
1451}
1452
1453
1454/*----------------------------------------------*/
1455/*--- The exported fns ..                    ---*/
1456/*----------------------------------------------*/
1457
1458/* Layout of the real x87 state. */
1459/* 13 June 05: Fpu_State and auxiliary constants was moved to
1460   g_generic_x87.h */
1461
1462
1463/* CLEAN HELPER */
1464/* fpucw[15:0] contains a x87 native format FPU control word.
1465   Extract from it the required FPROUND value and any resulting
1466   emulation warning, and return (warn << 32) | fpround value.
1467*/
1468ULong x86g_check_fldcw ( UInt fpucw )
1469{
1470   /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1471   /* NOTE, encoded exactly as per enum IRRoundingMode. */
1472   UInt rmode = (fpucw >> 10) & 3;
1473
1474   /* Detect any required emulation warnings. */
1475   VexEmWarn ew = EmWarn_NONE;
1476
1477   if ((fpucw & 0x3F) != 0x3F) {
1478      /* unmasked exceptions! */
1479      ew = EmWarn_X86_x87exns;
1480   }
1481   else
1482   if (((fpucw >> 8) & 3) != 3) {
1483      /* unsupported precision */
1484      ew = EmWarn_X86_x87precision;
1485   }
1486
1487   return (((ULong)ew) << 32) | ((ULong)rmode);
1488}
1489
1490/* CLEAN HELPER */
1491/* Given fpround as an IRRoundingMode value, create a suitable x87
1492   native format FPU control word. */
1493UInt x86g_create_fpucw ( UInt fpround )
1494{
1495   fpround &= 3;
1496   return 0x037F | (fpround << 10);
1497}
1498
1499
1500/* CLEAN HELPER */
1501/* mxcsr[15:0] contains a SSE native format MXCSR value.
1502   Extract from it the required SSEROUND value and any resulting
1503   emulation warning, and return (warn << 32) | sseround value.
1504*/
1505ULong x86g_check_ldmxcsr ( UInt mxcsr )
1506{
1507   /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1508   /* NOTE, encoded exactly as per enum IRRoundingMode. */
1509   UInt rmode = (mxcsr >> 13) & 3;
1510
1511   /* Detect any required emulation warnings. */
1512   VexEmWarn ew = EmWarn_NONE;
1513
1514   if ((mxcsr & 0x1F80) != 0x1F80) {
1515      /* unmasked exceptions! */
1516      ew = EmWarn_X86_sseExns;
1517   }
1518   else
1519   if (mxcsr & (1<<15)) {
1520      /* FZ is set */
1521      ew = EmWarn_X86_fz;
1522   }
1523   else
1524   if (mxcsr & (1<<6)) {
1525      /* DAZ is set */
1526      ew = EmWarn_X86_daz;
1527   }
1528
1529   return (((ULong)ew) << 32) | ((ULong)rmode);
1530}
1531
1532
1533/* CLEAN HELPER */
1534/* Given sseround as an IRRoundingMode value, create a suitable SSE
1535   native format MXCSR value. */
1536UInt x86g_create_mxcsr ( UInt sseround )
1537{
1538   sseround &= 3;
1539   return 0x1F80 | (sseround << 13);
1540}
1541
1542
1543/* CALLED FROM GENERATED CODE */
1544/* DIRTY HELPER (writes guest state) */
1545/* Initialise the x87 FPU state as per 'finit'. */
1546void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1547{
1548   Int i;
1549   gst->guest_FTOP = 0;
1550   for (i = 0; i < 8; i++) {
1551      gst->guest_FPTAG[i] = 0; /* empty */
1552      gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1553   }
1554   gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1555   gst->guest_FC3210  = 0;
1556}
1557
1558
1559/* This is used to implement both 'frstor' and 'fldenv'.  The latter
1560   appears to differ from the former only in that the 8 FP registers
1561   themselves are not transferred into the guest state. */
1562static
1563VexEmWarn do_put_x87 ( Bool moveRegs,
1564                       /*IN*/UChar* x87_state,
1565                       /*OUT*/VexGuestX86State* vex_state )
1566{
1567   Int        stno, preg;
1568   UInt       tag;
1569   ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1570   UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1571   Fpu_State* x87     = (Fpu_State*)x87_state;
1572   UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1573   UInt       tagw    = x87->env[FP_ENV_TAG];
1574   UInt       fpucw   = x87->env[FP_ENV_CTRL];
1575   UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1576   VexEmWarn  ew;
1577   UInt       fpround;
1578   ULong      pair;
1579
1580   /* Copy registers and tags */
1581   for (stno = 0; stno < 8; stno++) {
1582      preg = (stno + ftop) & 7;
1583      tag = (tagw >> (2*preg)) & 3;
1584      if (tag == 3) {
1585         /* register is empty */
1586         /* hmm, if it's empty, does it still get written?  Probably
1587            safer to say it does.  If we don't, memcheck could get out
1588            of sync, in that it thinks all FP registers are defined by
1589            this helper, but in reality some have not been updated. */
1590         if (moveRegs)
1591            vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1592         vexTags[preg] = 0;
1593      } else {
1594         /* register is non-empty */
1595         if (moveRegs)
1596            convert_f80le_to_f64le( &x87->reg[10*stno],
1597                                    (UChar*)&vexRegs[preg] );
1598         vexTags[preg] = 1;
1599      }
1600   }
1601
1602   /* stack pointer */
1603   vex_state->guest_FTOP = ftop;
1604
1605   /* status word */
1606   vex_state->guest_FC3210 = c3210;
1607
1608   /* handle the control word, setting FPROUND and detecting any
1609      emulation warnings. */
1610   pair    = x86g_check_fldcw ( (UInt)fpucw );
1611   fpround = (UInt)pair;
1612   ew      = (VexEmWarn)(pair >> 32);
1613
1614   vex_state->guest_FPROUND = fpround & 3;
1615
1616   /* emulation warnings --> caller */
1617   return ew;
1618}
1619
1620
1621/* Create an x87 FPU state from the guest state, as close as
1622   we can approximate it. */
1623static
1624void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1625                  /*OUT*/UChar* x87_state )
1626{
1627   Int        i, stno, preg;
1628   UInt       tagw;
1629   ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1630   UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1631   Fpu_State* x87     = (Fpu_State*)x87_state;
1632   UInt       ftop    = vex_state->guest_FTOP;
1633   UInt       c3210   = vex_state->guest_FC3210;
1634
1635   for (i = 0; i < 14; i++)
1636      x87->env[i] = 0;
1637
1638   x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1639   x87->env[FP_ENV_STAT]
1640      = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1641   x87->env[FP_ENV_CTRL]
1642      = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1643
1644   /* Dump the register stack in ST order. */
1645   tagw = 0;
1646   for (stno = 0; stno < 8; stno++) {
1647      preg = (stno + ftop) & 7;
1648      if (vexTags[preg] == 0) {
1649         /* register is empty */
1650         tagw |= (3 << (2*preg));
1651         convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1652                                 &x87->reg[10*stno] );
1653      } else {
1654         /* register is full. */
1655         tagw |= (0 << (2*preg));
1656         convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1657                                 &x87->reg[10*stno] );
1658      }
1659   }
1660   x87->env[FP_ENV_TAG] = toUShort(tagw);
1661}
1662
1663
1664/* CALLED FROM GENERATED CODE */
1665/* DIRTY HELPER (reads guest state, writes guest mem) */
1666void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1667{
1668   /* Somewhat roundabout, but at least it's simple. */
1669   Fpu_State tmp;
1670   UShort*   addrS = (UShort*)addr;
1671   UChar*    addrC = (UChar*)addr;
1672   U128*     xmm   = (U128*)(addr + 160);
1673   UInt      mxcsr;
1674   UShort    fp_tags;
1675   UInt      summary_tags;
1676   Int       r, stno;
1677   UShort    *srcS, *dstS;
1678
1679   do_get_x87( gst, (UChar*)&tmp );
1680   mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1681
1682   /* Now build the proper fxsave image from the x87 image we just
1683      made. */
1684
1685   addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1686   addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1687
1688   /* set addrS[2] in an endian-independent way */
1689   summary_tags = 0;
1690   fp_tags = tmp.env[FP_ENV_TAG];
1691   for (r = 0; r < 8; r++) {
1692      if ( ((fp_tags >> (2*r)) & 3) != 3 )
1693         summary_tags |= (1 << r);
1694   }
1695   addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1696   addrC[5]  = 0; /* pad */
1697
1698   addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1699   addrS[4]  = 0;
1700   addrS[5]  = 0; /* FPU IP (bogus) */
1701   addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1702                     could conceivably dump %CS here) */
1703
1704   addrS[7]  = 0; /* Intel reserved */
1705
1706   addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1707   addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1708   addrS[10] = 0; /* segment selector for above operand pointer; %DS
1709                     perhaps? */
1710   addrS[11] = 0; /* Intel reserved */
1711
1712   addrS[12] = toUShort(mxcsr);  /* MXCSR */
1713   addrS[13] = toUShort(mxcsr >> 16);
1714
1715   addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1716   addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1717
1718   /* Copy in the FP registers, in ST order. */
1719   for (stno = 0; stno < 8; stno++) {
1720      srcS = (UShort*)(&tmp.reg[10*stno]);
1721      dstS = (UShort*)(&addrS[16 + 8*stno]);
1722      dstS[0] = srcS[0];
1723      dstS[1] = srcS[1];
1724      dstS[2] = srcS[2];
1725      dstS[3] = srcS[3];
1726      dstS[4] = srcS[4];
1727      dstS[5] = 0;
1728      dstS[6] = 0;
1729      dstS[7] = 0;
1730   }
1731
1732   /* That's the first 160 bytes of the image done.  Now only %xmm0
1733      .. %xmm7 remain to be copied.  If the host is big-endian, these
1734      need to be byte-swapped. */
1735   vassert(host_is_little_endian());
1736
1737#  define COPY_U128(_dst,_src)                       \
1738      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1739           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1740      while (0)
1741
1742   COPY_U128( xmm[0], gst->guest_XMM0 );
1743   COPY_U128( xmm[1], gst->guest_XMM1 );
1744   COPY_U128( xmm[2], gst->guest_XMM2 );
1745   COPY_U128( xmm[3], gst->guest_XMM3 );
1746   COPY_U128( xmm[4], gst->guest_XMM4 );
1747   COPY_U128( xmm[5], gst->guest_XMM5 );
1748   COPY_U128( xmm[6], gst->guest_XMM6 );
1749   COPY_U128( xmm[7], gst->guest_XMM7 );
1750
1751#  undef COPY_U128
1752}
1753
1754
1755/* CALLED FROM GENERATED CODE */
1756/* DIRTY HELPER (writes guest state, reads guest mem) */
1757VexEmWarn x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1758{
1759   Fpu_State tmp;
1760   VexEmWarn warnX87 = EmWarn_NONE;
1761   VexEmWarn warnXMM = EmWarn_NONE;
1762   UShort*   addrS   = (UShort*)addr;
1763   UChar*    addrC   = (UChar*)addr;
1764   U128*     xmm     = (U128*)(addr + 160);
1765   UShort    fp_tags;
1766   Int       r, stno, i;
1767
1768   /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1769      to be byte-swapped. */
1770   vassert(host_is_little_endian());
1771
1772#  define COPY_U128(_dst,_src)                       \
1773      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1774           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1775      while (0)
1776
1777   COPY_U128( gst->guest_XMM0, xmm[0] );
1778   COPY_U128( gst->guest_XMM1, xmm[1] );
1779   COPY_U128( gst->guest_XMM2, xmm[2] );
1780   COPY_U128( gst->guest_XMM3, xmm[3] );
1781   COPY_U128( gst->guest_XMM4, xmm[4] );
1782   COPY_U128( gst->guest_XMM5, xmm[5] );
1783   COPY_U128( gst->guest_XMM6, xmm[6] );
1784   COPY_U128( gst->guest_XMM7, xmm[7] );
1785
1786#  undef COPY_U128
1787
1788   /* Copy the x87 registers out of the image, into a temporary
1789      Fpu_State struct. */
1790
1791   /* LLVM on Darwin turns the following loop into a movaps plus a
1792      handful of scalar stores.  This would work fine except for the
1793      fact that VEX doesn't keep the stack correctly (16-) aligned for
1794      the call, so it segfaults.  Hence, split the loop into two
1795      pieces (and pray LLVM doesn't merely glue them back together) so
1796      it's composed only of scalar stores and so is alignment
1797      insensitive.  Of course this is a kludge of the lamest kind --
1798      VEX should be fixed properly. */
1799   /* Code that seems to trigger the problem:
1800      for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1801   for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1802   for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1803
1804   for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1805   /* fill in tmp.reg[0..7] */
1806   for (stno = 0; stno < 8; stno++) {
1807      UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1808      UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1809      dstS[0] = srcS[0];
1810      dstS[1] = srcS[1];
1811      dstS[2] = srcS[2];
1812      dstS[3] = srcS[3];
1813      dstS[4] = srcS[4];
1814   }
1815   /* fill in tmp.env[0..13] */
1816   tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1817   tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1818
1819   fp_tags = 0;
1820   for (r = 0; r < 8; r++) {
1821      if (addrC[4] & (1<<r))
1822         fp_tags |= (0 << (2*r)); /* EMPTY */
1823      else
1824         fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1825   }
1826   tmp.env[FP_ENV_TAG] = fp_tags;
1827
1828   /* Now write 'tmp' into the guest state. */
1829   warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1830
1831   { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1832                | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1833     ULong w64 = x86g_check_ldmxcsr( w32 );
1834
1835     warnXMM = (VexEmWarn)(w64 >> 32);
1836
1837     gst->guest_SSEROUND = (UInt)w64;
1838   }
1839
1840   /* Prefer an X87 emwarn over an XMM one, if both exist. */
1841   if (warnX87 != EmWarn_NONE)
1842      return warnX87;
1843   else
1844      return warnXMM;
1845}
1846
1847
1848/* CALLED FROM GENERATED CODE */
1849/* DIRTY HELPER (reads guest state, writes guest mem) */
1850void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1851{
1852   do_get_x87( gst, (UChar*)addr );
1853}
1854
1855/* CALLED FROM GENERATED CODE */
1856/* DIRTY HELPER (writes guest state, reads guest mem) */
1857VexEmWarn x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1858{
1859   return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1860}
1861
1862/* CALLED FROM GENERATED CODE */
1863/* DIRTY HELPER (reads guest state, writes guest mem) */
1864void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1865{
1866   /* Somewhat roundabout, but at least it's simple. */
1867   Int       i;
1868   UShort*   addrP = (UShort*)addr;
1869   Fpu_State tmp;
1870   do_get_x87( gst, (UChar*)&tmp );
1871   for (i = 0; i < 14; i++)
1872      addrP[i] = tmp.env[i];
1873}
1874
1875/* CALLED FROM GENERATED CODE */
1876/* DIRTY HELPER (writes guest state, reads guest mem) */
1877VexEmWarn x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1878{
1879   return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1880}
1881
1882
1883/*---------------------------------------------------------------*/
1884/*--- Misc integer helpers, including rotates and CPUID.      ---*/
1885/*---------------------------------------------------------------*/
1886
1887/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1888/* Calculate both flags and value result for rotate right
1889   through the carry bit.  Result in low 32 bits,
1890   new flags (OSZACP) in high 32 bits.
1891*/
1892ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1893{
1894   UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1895
1896   switch (sz) {
1897      case 4:
1898         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1899         of        = ((arg >> 31) ^ cf) & 1;
1900         while (tempCOUNT > 0) {
1901            tempcf = arg & 1;
1902            arg    = (arg >> 1) | (cf << 31);
1903            cf     = tempcf;
1904            tempCOUNT--;
1905         }
1906         break;
1907      case 2:
1908         while (tempCOUNT >= 17) tempCOUNT -= 17;
1909         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1910         of        = ((arg >> 15) ^ cf) & 1;
1911         while (tempCOUNT > 0) {
1912            tempcf = arg & 1;
1913            arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
1914            cf     = tempcf;
1915            tempCOUNT--;
1916         }
1917         break;
1918      case 1:
1919         while (tempCOUNT >= 9) tempCOUNT -= 9;
1920         cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1921         of        = ((arg >> 7) ^ cf) & 1;
1922         while (tempCOUNT > 0) {
1923            tempcf = arg & 1;
1924            arg    = ((arg >> 1) & 0x7F) | (cf << 7);
1925            cf     = tempcf;
1926            tempCOUNT--;
1927         }
1928         break;
1929      default:
1930         vpanic("calculate_RCR: invalid size");
1931   }
1932
1933   cf &= 1;
1934   of &= 1;
1935   eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1936   eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1937
1938   return (((ULong)eflags_in) << 32) | ((ULong)arg);
1939}
1940
1941
1942/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1943/* Calculate both flags and value result for rotate left
1944   through the carry bit.  Result in low 32 bits,
1945   new flags (OSZACP) in high 32 bits.
1946*/
1947ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1948{
1949   UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1950
1951   switch (sz) {
1952      case 4:
1953         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1954         while (tempCOUNT > 0) {
1955            tempcf = (arg >> 31) & 1;
1956            arg    = (arg << 1) | (cf & 1);
1957            cf     = tempcf;
1958            tempCOUNT--;
1959         }
1960         of = ((arg >> 31) ^ cf) & 1;
1961         break;
1962      case 2:
1963         while (tempCOUNT >= 17) tempCOUNT -= 17;
1964         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1965         while (tempCOUNT > 0) {
1966            tempcf = (arg >> 15) & 1;
1967            arg    = 0xFFFF & ((arg << 1) | (cf & 1));
1968            cf     = tempcf;
1969            tempCOUNT--;
1970         }
1971         of = ((arg >> 15) ^ cf) & 1;
1972         break;
1973      case 1:
1974         while (tempCOUNT >= 9) tempCOUNT -= 9;
1975         cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1976         while (tempCOUNT > 0) {
1977            tempcf = (arg >> 7) & 1;
1978            arg    = 0xFF & ((arg << 1) | (cf & 1));
1979            cf     = tempcf;
1980            tempCOUNT--;
1981         }
1982         of = ((arg >> 7) ^ cf) & 1;
1983         break;
1984      default:
1985         vpanic("calculate_RCL: invalid size");
1986   }
1987
1988   cf &= 1;
1989   of &= 1;
1990   eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1991   eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1992
1993   return (((ULong)eflags_in) << 32) | ((ULong)arg);
1994}
1995
1996
1997/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1998/* Calculate both flags and value result for DAA/DAS/AAA/AAS.
1999   AX value in low half of arg, OSZACP in upper half.
2000   See guest-x86/toIR.c usage point for details.
2001*/
2002static UInt calc_parity_8bit ( UInt w32 ) {
2003   UInt i;
2004   UInt p = 1;
2005   for (i = 0; i < 8; i++)
2006      p ^= (1 & (w32 >> i));
2007   return p;
2008}
2009UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2010{
2011   UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2012   UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2013   UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2014   UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2015   UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2016   UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2017   UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2018   UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2019   UInt result = 0;
2020
2021   switch (opcode) {
2022      case 0x27: { /* DAA */
2023         UInt old_AL = r_AL;
2024         UInt old_C  = r_C;
2025         r_C = 0;
2026         if ((r_AL & 0xF) > 9 || r_A == 1) {
2027            r_AL = r_AL + 6;
2028            r_C  = old_C;
2029            if (r_AL >= 0x100) r_C = 1;
2030            r_A = 1;
2031         } else {
2032            r_A = 0;
2033         }
2034         if (old_AL > 0x99 || old_C == 1) {
2035            r_AL = r_AL + 0x60;
2036            r_C  = 1;
2037         } else {
2038            r_C = 0;
2039         }
2040         /* O is undefined.  S Z and P are set according to the
2041	    result. */
2042         r_AL &= 0xFF;
2043         r_O = 0; /* let's say */
2044         r_S = (r_AL & 0x80) ? 1 : 0;
2045         r_Z = (r_AL == 0) ? 1 : 0;
2046         r_P = calc_parity_8bit( r_AL );
2047         break;
2048      }
2049      case 0x2F: { /* DAS */
2050         UInt old_AL = r_AL;
2051         UInt old_C  = r_C;
2052         r_C = 0;
2053         if ((r_AL & 0xF) > 9 || r_A == 1) {
2054            Bool borrow = r_AL < 6;
2055            r_AL = r_AL - 6;
2056            r_C  = old_C;
2057            if (borrow) r_C = 1;
2058            r_A = 1;
2059         } else {
2060            r_A = 0;
2061         }
2062         if (old_AL > 0x99 || old_C == 1) {
2063            r_AL = r_AL - 0x60;
2064            r_C  = 1;
2065         } else {
2066            /* Intel docs are wrong: r_C = 0; */
2067         }
2068         /* O is undefined.  S Z and P are set according to the
2069	    result. */
2070         r_AL &= 0xFF;
2071         r_O = 0; /* let's say */
2072         r_S = (r_AL & 0x80) ? 1 : 0;
2073         r_Z = (r_AL == 0) ? 1 : 0;
2074         r_P = calc_parity_8bit( r_AL );
2075         break;
2076      }
2077      case 0x37: { /* AAA */
2078         Bool nudge = r_AL > 0xF9;
2079         if ((r_AL & 0xF) > 9 || r_A == 1) {
2080            r_AL = r_AL + 6;
2081            r_AH = r_AH + 1 + (nudge ? 1 : 0);
2082            r_A  = 1;
2083            r_C  = 1;
2084            r_AL = r_AL & 0xF;
2085         } else {
2086            r_A  = 0;
2087            r_C  = 0;
2088            r_AL = r_AL & 0xF;
2089         }
2090         /* O S Z and P are undefined. */
2091         r_O = r_S = r_Z = r_P = 0; /* let's say */
2092         break;
2093      }
2094      case 0x3F: { /* AAS */
2095         Bool nudge = r_AL < 0x06;
2096         if ((r_AL & 0xF) > 9 || r_A == 1) {
2097            r_AL = r_AL - 6;
2098            r_AH = r_AH - 1 - (nudge ? 1 : 0);
2099            r_A  = 1;
2100            r_C  = 1;
2101            r_AL = r_AL & 0xF;
2102         } else {
2103            r_A  = 0;
2104            r_C  = 0;
2105            r_AL = r_AL & 0xF;
2106         }
2107         /* O S Z and P are undefined. */
2108         r_O = r_S = r_Z = r_P = 0; /* let's say */
2109         break;
2110      }
2111      default:
2112         vassert(0);
2113   }
2114   result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2115            | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2116            | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2117            | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2118            | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2119            | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2120            | ( (r_AH & 0xFF) << 8 )
2121            | ( (r_AL & 0xFF) << 0 );
2122   return result;
2123}
2124
2125UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2126{
2127   UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2128   UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2129   UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2130   UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2131   UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2132   UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2133   UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2134   UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2135   UInt result = 0;
2136
2137   switch (opcode) {
2138      case 0xD4: { /* AAM */
2139         r_AH = r_AL / 10;
2140         r_AL = r_AL % 10;
2141         break;
2142      }
2143      case 0xD5: { /* AAD */
2144         r_AL = ((r_AH * 10) + r_AL) & 0xff;
2145         r_AH = 0;
2146         break;
2147      }
2148      default:
2149         vassert(0);
2150   }
2151
2152   r_O = 0; /* let's say (undefined) */
2153   r_C = 0; /* let's say (undefined) */
2154   r_A = 0; /* let's say (undefined) */
2155   r_S = (r_AL & 0x80) ? 1 : 0;
2156   r_Z = (r_AL == 0) ? 1 : 0;
2157   r_P = calc_parity_8bit( r_AL );
2158
2159   result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2160            | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2161            | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2162            | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2163            | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2164            | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2165            | ( (r_AH & 0xFF) << 8 )
2166            | ( (r_AL & 0xFF) << 0 );
2167   return result;
2168}
2169
2170
2171/* CALLED FROM GENERATED CODE */
2172/* DIRTY HELPER (non-referentially-transparent) */
2173/* Horrible hack.  On non-x86 platforms, return 1. */
2174ULong x86g_dirtyhelper_RDTSC ( void )
2175{
2176#  if defined(__i386__)
2177   ULong res;
2178   __asm__ __volatile__("rdtsc" : "=A" (res));
2179   return res;
2180#  else
2181   return 1ULL;
2182#  endif
2183}
2184
2185
2186/* CALLED FROM GENERATED CODE */
2187/* DIRTY HELPER (modifies guest state) */
2188/* Claim to be a P55C (Intel Pentium/MMX) */
2189void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2190{
2191   switch (st->guest_EAX) {
2192      case 0:
2193         st->guest_EAX = 0x1;
2194         st->guest_EBX = 0x756e6547;
2195         st->guest_ECX = 0x6c65746e;
2196         st->guest_EDX = 0x49656e69;
2197         break;
2198      default:
2199         st->guest_EAX = 0x543;
2200         st->guest_EBX = 0x0;
2201         st->guest_ECX = 0x0;
2202         st->guest_EDX = 0x8001bf;
2203         break;
2204   }
2205}
2206
2207/* CALLED FROM GENERATED CODE */
2208/* DIRTY HELPER (modifies guest state) */
2209/* Claim to be the following SSE1-capable CPU:
2210   vendor_id       : GenuineIntel
2211   cpu family      : 6
2212   model           : 11
2213   model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2214   stepping        : 1
2215   cpu MHz         : 1131.013
2216   cache size      : 512 KB
2217*/
2218void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2219{
2220   switch (st->guest_EAX) {
2221      case 0:
2222         st->guest_EAX = 0x00000002;
2223         st->guest_EBX = 0x756e6547;
2224         st->guest_ECX = 0x6c65746e;
2225         st->guest_EDX = 0x49656e69;
2226         break;
2227      case 1:
2228         st->guest_EAX = 0x000006b1;
2229         st->guest_EBX = 0x00000004;
2230         st->guest_ECX = 0x00000000;
2231         st->guest_EDX = 0x0383fbff;
2232         break;
2233      default:
2234         st->guest_EAX = 0x03020101;
2235         st->guest_EBX = 0x00000000;
2236         st->guest_ECX = 0x00000000;
2237         st->guest_EDX = 0x0c040883;
2238         break;
2239   }
2240}
2241
2242/* Claim to be the following SSSE3-capable CPU (2 x ...):
2243   vendor_id       : GenuineIntel
2244   cpu family      : 6
2245   model           : 15
2246   model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2247   stepping        : 6
2248   cpu MHz         : 2394.000
2249   cache size      : 4096 KB
2250   physical id     : 0
2251   siblings        : 2
2252   core id         : 0
2253   cpu cores       : 2
2254   fpu             : yes
2255   fpu_exception   : yes
2256   cpuid level     : 10
2257   wp              : yes
2258   flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2259                     mtrr pge mca cmov pat pse36 clflush dts acpi
2260                     mmx fxsr sse sse2 ss ht tm syscall nx lm
2261                     constant_tsc pni monitor ds_cpl vmx est tm2
2262                     cx16 xtpr lahf_lm
2263   bogomips        : 4798.78
2264   clflush size    : 64
2265   cache_alignment : 64
2266   address sizes   : 36 bits physical, 48 bits virtual
2267   power management:
2268*/
2269void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2270{
2271#  define SET_ABCD(_a,_b,_c,_d)               \
2272      do { st->guest_EAX = (UInt)(_a);        \
2273           st->guest_EBX = (UInt)(_b);        \
2274           st->guest_ECX = (UInt)(_c);        \
2275           st->guest_EDX = (UInt)(_d);        \
2276      } while (0)
2277
2278   switch (st->guest_EAX) {
2279      case 0x00000000:
2280         SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2281         break;
2282      case 0x00000001:
2283         SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2284         break;
2285      case 0x00000002:
2286         SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2287         break;
2288      case 0x00000003:
2289         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2290         break;
2291      case 0x00000004: {
2292         switch (st->guest_ECX) {
2293            case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2294                                      0x0000003f, 0x00000001); break;
2295            case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2296                                      0x0000003f, 0x00000001); break;
2297            case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2298                                      0x00000fff, 0x00000001); break;
2299            default:         SET_ABCD(0x00000000, 0x00000000,
2300                                      0x00000000, 0x00000000); break;
2301         }
2302         break;
2303      }
2304      case 0x00000005:
2305         SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2306         break;
2307      case 0x00000006:
2308         SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2309         break;
2310      case 0x00000007:
2311         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2312         break;
2313      case 0x00000008:
2314         SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2315         break;
2316      case 0x00000009:
2317         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2318         break;
2319      case 0x0000000a:
2320      unhandled_eax_value:
2321         SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2322         break;
2323      case 0x80000000:
2324         SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2325         break;
2326      case 0x80000001:
2327         SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2328         break;
2329      case 0x80000002:
2330         SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2331         break;
2332      case 0x80000003:
2333         SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2334         break;
2335      case 0x80000004:
2336         SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2337         break;
2338      case 0x80000005:
2339         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2340         break;
2341      case 0x80000006:
2342         SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2343         break;
2344      case 0x80000007:
2345         SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2346         break;
2347      case 0x80000008:
2348         SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2349         break;
2350      default:
2351         goto unhandled_eax_value;
2352   }
2353#  undef SET_ABCD
2354}
2355
2356
2357/* CALLED FROM GENERATED CODE */
2358/* DIRTY HELPER (non-referentially-transparent) */
2359/* Horrible hack.  On non-x86 platforms, return 0. */
2360UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2361{
2362#  if defined(__i386__)
2363   UInt r = 0;
2364   portno &= 0xFFFF;
2365   switch (sz) {
2366      case 4:
2367         __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2368                              : "=a" (r) : "Nd" (portno));
2369	 break;
2370      case 2:
2371         __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2372                              : "=a" (r) : "Nd" (portno));
2373	 break;
2374      case 1:
2375         __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2376                              : "=a" (r) : "Nd" (portno));
2377	 break;
2378      default:
2379         break;
2380   }
2381   return r;
2382#  else
2383   return 0;
2384#  endif
2385}
2386
2387
2388/* CALLED FROM GENERATED CODE */
2389/* DIRTY HELPER (non-referentially-transparent) */
2390/* Horrible hack.  On non-x86 platforms, do nothing. */
2391void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2392{
2393#  if defined(__i386__)
2394   portno &= 0xFFFF;
2395   switch (sz) {
2396      case 4:
2397         __asm__ __volatile__("outl %0, %w1"
2398                              : : "a" (data), "Nd" (portno));
2399	 break;
2400      case 2:
2401         __asm__ __volatile__("outw %w0, %w1"
2402                              : : "a" (data), "Nd" (portno));
2403	 break;
2404      case 1:
2405         __asm__ __volatile__("outb %b0, %w1"
2406                              : : "a" (data), "Nd" (portno));
2407	 break;
2408      default:
2409         break;
2410   }
2411#  else
2412   /* do nothing */
2413#  endif
2414}
2415
2416/* CALLED FROM GENERATED CODE */
2417/* DIRTY HELPER (non-referentially-transparent) */
2418/* Horrible hack.  On non-x86 platforms, do nothing. */
2419/* op = 0: call the native SGDT instruction.
2420   op = 1: call the native SIDT instruction.
2421*/
2422void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2423#  if defined(__i386__)
2424   switch (op) {
2425      case 0:
2426         __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2427         break;
2428      case 1:
2429         __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2430         break;
2431      default:
2432         vpanic("x86g_dirtyhelper_SxDT");
2433   }
2434#  else
2435   /* do nothing */
2436   UChar* p = (UChar*)address;
2437   p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2438#  endif
2439}
2440
2441/*---------------------------------------------------------------*/
2442/*--- Helpers for MMX/SSE/SSE2.                               ---*/
2443/*---------------------------------------------------------------*/
2444
2445static inline UChar abdU8 ( UChar xx, UChar yy ) {
2446   return toUChar(xx>yy ? xx-yy : yy-xx);
2447}
2448
2449static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2450   return (((ULong)w1) << 32) | ((ULong)w0);
2451}
2452
2453static inline UShort sel16x4_3 ( ULong w64 ) {
2454   UInt hi32 = toUInt(w64 >> 32);
2455   return toUShort(hi32 >> 16);
2456}
2457static inline UShort sel16x4_2 ( ULong w64 ) {
2458   UInt hi32 = toUInt(w64 >> 32);
2459   return toUShort(hi32);
2460}
2461static inline UShort sel16x4_1 ( ULong w64 ) {
2462   UInt lo32 = toUInt(w64);
2463   return toUShort(lo32 >> 16);
2464}
2465static inline UShort sel16x4_0 ( ULong w64 ) {
2466   UInt lo32 = toUInt(w64);
2467   return toUShort(lo32);
2468}
2469
2470static inline UChar sel8x8_7 ( ULong w64 ) {
2471   UInt hi32 = toUInt(w64 >> 32);
2472   return toUChar(hi32 >> 24);
2473}
2474static inline UChar sel8x8_6 ( ULong w64 ) {
2475   UInt hi32 = toUInt(w64 >> 32);
2476   return toUChar(hi32 >> 16);
2477}
2478static inline UChar sel8x8_5 ( ULong w64 ) {
2479   UInt hi32 = toUInt(w64 >> 32);
2480   return toUChar(hi32 >> 8);
2481}
2482static inline UChar sel8x8_4 ( ULong w64 ) {
2483   UInt hi32 = toUInt(w64 >> 32);
2484   return toUChar(hi32 >> 0);
2485}
2486static inline UChar sel8x8_3 ( ULong w64 ) {
2487   UInt lo32 = toUInt(w64);
2488   return toUChar(lo32 >> 24);
2489}
2490static inline UChar sel8x8_2 ( ULong w64 ) {
2491   UInt lo32 = toUInt(w64);
2492   return toUChar(lo32 >> 16);
2493}
2494static inline UChar sel8x8_1 ( ULong w64 ) {
2495   UInt lo32 = toUInt(w64);
2496   return toUChar(lo32 >> 8);
2497}
2498static inline UChar sel8x8_0 ( ULong w64 ) {
2499   UInt lo32 = toUInt(w64);
2500   return toUChar(lo32 >> 0);
2501}
2502
2503/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2504ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2505{
2506   return
2507      mk32x2(
2508         (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2509            + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2510         (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2511            + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2512      );
2513}
2514
2515/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2516UInt x86g_calculate_mmx_pmovmskb ( ULong xx )
2517{
2518   UInt r = 0;
2519   if (xx & (1ULL << (64-1))) r |= (1<<7);
2520   if (xx & (1ULL << (56-1))) r |= (1<<6);
2521   if (xx & (1ULL << (48-1))) r |= (1<<5);
2522   if (xx & (1ULL << (40-1))) r |= (1<<4);
2523   if (xx & (1ULL << (32-1))) r |= (1<<3);
2524   if (xx & (1ULL << (24-1))) r |= (1<<2);
2525   if (xx & (1ULL << (16-1))) r |= (1<<1);
2526   if (xx & (1ULL << ( 8-1))) r |= (1<<0);
2527   return r;
2528}
2529
2530/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2531ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2532{
2533   UInt t = 0;
2534   t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2535   t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2536   t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2537   t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2538   t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2539   t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2540   t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2541   t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2542   t &= 0xFFFF;
2543   return (ULong)t;
2544}
2545
2546/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2547UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
2548{
2549   UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi );
2550   UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo );
2551   return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
2552}
2553
2554
2555/*---------------------------------------------------------------*/
2556/*--- Helpers for dealing with segment overrides.             ---*/
2557/*---------------------------------------------------------------*/
2558
2559static inline
2560UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2561{
2562   UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2563   UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2564   UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2565   return (hi << 24) | (mid << 16) | lo;
2566}
2567
2568static inline
2569UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2570{
2571    UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2572    UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2573    UInt limit = (hi << 16) | lo;
2574    if (ent->LdtEnt.Bits.Granularity)
2575       limit = (limit << 12) | 0xFFF;
2576    return limit;
2577}
2578
2579/* CALLED FROM GENERATED CODE: CLEAN HELPER */
2580ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2581                              UInt seg_selector, UInt virtual_addr )
2582{
2583   UInt tiBit, base, limit;
2584   VexGuestX86SegDescr* the_descrs;
2585
2586   Bool verboze = False;
2587
2588   /* If this isn't true, we're in Big Trouble. */
2589   vassert(8 == sizeof(VexGuestX86SegDescr));
2590
2591   if (verboze)
2592      vex_printf("x86h_use_seg_selector: "
2593                 "seg_selector = 0x%x, vaddr = 0x%x\n",
2594                 seg_selector, virtual_addr);
2595
2596   /* Check for wildly invalid selector. */
2597   if (seg_selector & ~0xFFFF)
2598      goto bad;
2599
2600   seg_selector &= 0x0000FFFF;
2601
2602   /* Sanity check the segment selector.  Ensure that RPL=11b (least
2603      privilege).  This forms the bottom 2 bits of the selector. */
2604   if ((seg_selector & 3) != 3)
2605      goto bad;
2606
2607   /* Extract the TI bit (0 means GDT, 1 means LDT) */
2608   tiBit = (seg_selector >> 2) & 1;
2609
2610   /* Convert the segment selector onto a table index */
2611   seg_selector >>= 3;
2612   vassert(seg_selector >= 0 && seg_selector < 8192);
2613
2614   if (tiBit == 0) {
2615
2616      /* GDT access. */
2617      /* Do we actually have a GDT to look at? */
2618      if (gdt == 0)
2619         goto bad;
2620
2621      /* Check for access to non-existent entry. */
2622      if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2623         goto bad;
2624
2625      the_descrs = (VexGuestX86SegDescr*)gdt;
2626      base  = get_segdescr_base (&the_descrs[seg_selector]);
2627      limit = get_segdescr_limit(&the_descrs[seg_selector]);
2628
2629   } else {
2630
2631      /* All the same stuff, except for the LDT. */
2632      if (ldt == 0)
2633         goto bad;
2634
2635      if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2636         goto bad;
2637
2638      the_descrs = (VexGuestX86SegDescr*)ldt;
2639      base  = get_segdescr_base (&the_descrs[seg_selector]);
2640      limit = get_segdescr_limit(&the_descrs[seg_selector]);
2641
2642   }
2643
2644   /* Do the limit check.  Note, this check is just slightly too
2645      slack.  Really it should be "if (virtual_addr + size - 1 >=
2646      limit)," but we don't have the size info to hand.  Getting it
2647      could be significantly complex.  */
2648   if (virtual_addr >= limit)
2649      goto bad;
2650
2651   if (verboze)
2652      vex_printf("x86h_use_seg_selector: "
2653                 "base = 0x%x, addr = 0x%x\n",
2654                 base, base + virtual_addr);
2655
2656   /* High 32 bits are zero, indicating success. */
2657   return (ULong)( ((UInt)virtual_addr) + base );
2658
2659 bad:
2660   return 1ULL << 32;
2661}
2662
2663
2664/*---------------------------------------------------------------*/
2665/*--- Helpers for dealing with, and describing,               ---*/
2666/*--- guest state as a whole.                                 ---*/
2667/*---------------------------------------------------------------*/
2668
2669/* Initialise the entire x86 guest state. */
2670/* VISIBLE TO LIBVEX CLIENT */
2671void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2672{
2673   vex_state->host_EvC_FAILADDR = 0;
2674   vex_state->host_EvC_COUNTER = 0;
2675
2676   vex_state->guest_EAX = 0;
2677   vex_state->guest_ECX = 0;
2678   vex_state->guest_EDX = 0;
2679   vex_state->guest_EBX = 0;
2680   vex_state->guest_ESP = 0;
2681   vex_state->guest_EBP = 0;
2682   vex_state->guest_ESI = 0;
2683   vex_state->guest_EDI = 0;
2684
2685   vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2686   vex_state->guest_CC_DEP1 = 0;
2687   vex_state->guest_CC_DEP2 = 0;
2688   vex_state->guest_CC_NDEP = 0;
2689   vex_state->guest_DFLAG   = 1; /* forwards */
2690   vex_state->guest_IDFLAG  = 0;
2691   vex_state->guest_ACFLAG  = 0;
2692
2693   vex_state->guest_EIP = 0;
2694
2695   /* Initialise the simulated FPU */
2696   x86g_dirtyhelper_FINIT( vex_state );
2697
2698   /* Initialse the SSE state. */
2699#  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2700
2701   vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2702   SSEZERO(vex_state->guest_XMM0);
2703   SSEZERO(vex_state->guest_XMM1);
2704   SSEZERO(vex_state->guest_XMM2);
2705   SSEZERO(vex_state->guest_XMM3);
2706   SSEZERO(vex_state->guest_XMM4);
2707   SSEZERO(vex_state->guest_XMM5);
2708   SSEZERO(vex_state->guest_XMM6);
2709   SSEZERO(vex_state->guest_XMM7);
2710
2711#  undef SSEZERO
2712
2713   vex_state->guest_CS  = 0;
2714   vex_state->guest_DS  = 0;
2715   vex_state->guest_ES  = 0;
2716   vex_state->guest_FS  = 0;
2717   vex_state->guest_GS  = 0;
2718   vex_state->guest_SS  = 0;
2719   vex_state->guest_LDT = 0;
2720   vex_state->guest_GDT = 0;
2721
2722   vex_state->guest_EMWARN = EmWarn_NONE;
2723
2724   /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2725   vex_state->guest_TISTART = 0;
2726   vex_state->guest_TILEN   = 0;
2727
2728   vex_state->guest_NRADDR   = 0;
2729   vex_state->guest_SC_CLASS = 0;
2730   vex_state->guest_IP_AT_SYSCALL = 0;
2731
2732   Int i;
2733   for (i = 0; i < sizeof(vex_state->padding)
2734                   / sizeof(vex_state->padding[0]); i++) {
2735      vex_state->padding[i] = 0;
2736   }
2737}
2738
2739
2740/* Figure out if any part of the guest state contained in minoff
2741   .. maxoff requires precise memory exceptions.  If in doubt return
2742   True (but this is generates significantly slower code).
2743
2744   By default we enforce precise exns for guest %ESP, %EBP and %EIP
2745   only.  These are the minimum needed to extract correct stack
2746   backtraces from x86 code.
2747*/
2748Bool guest_x86_state_requires_precise_mem_exns ( Int minoff,
2749                                                 Int maxoff)
2750{
2751   Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2752   Int ebp_max = ebp_min + 4 - 1;
2753   Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2754   Int esp_max = esp_min + 4 - 1;
2755   Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2756   Int eip_max = eip_min + 4 - 1;
2757
2758   if (maxoff < ebp_min || minoff > ebp_max) {
2759      /* no overlap with ebp */
2760   } else {
2761      return True;
2762   }
2763
2764   if (maxoff < esp_min || minoff > esp_max) {
2765      /* no overlap with esp */
2766   } else {
2767      return True;
2768   }
2769
2770   if (maxoff < eip_min || minoff > eip_max) {
2771      /* no overlap with eip */
2772   } else {
2773      return True;
2774   }
2775
2776   return False;
2777}
2778
2779
2780#define ALWAYSDEFD(field)                           \
2781    { offsetof(VexGuestX86State, field),            \
2782      (sizeof ((VexGuestX86State*)0)->field) }
2783
2784VexGuestLayout
2785   x86guest_layout
2786      = {
2787          /* Total size of the guest state, in bytes. */
2788          .total_sizeB = sizeof(VexGuestX86State),
2789
2790          /* Describe the stack pointer. */
2791          .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2792          .sizeof_SP = 4,
2793
2794          /* Describe the frame pointer. */
2795          .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2796          .sizeof_FP = 4,
2797
2798          /* Describe the instruction pointer. */
2799          .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2800          .sizeof_IP = 4,
2801
2802          /* Describe any sections to be regarded by Memcheck as
2803             'always-defined'. */
2804          .n_alwaysDefd = 24,
2805
2806          /* flags thunk: OP and NDEP are always defd, whereas DEP1
2807             and DEP2 have to be tracked.  See detailed comment in
2808             gdefs.h on meaning of thunk fields. */
2809          .alwaysDefd
2810             = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2811                 /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2812                 /*  2 */ ALWAYSDEFD(guest_DFLAG),
2813                 /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2814                 /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2815                 /*  5 */ ALWAYSDEFD(guest_EIP),
2816                 /*  6 */ ALWAYSDEFD(guest_FTOP),
2817                 /*  7 */ ALWAYSDEFD(guest_FPTAG),
2818                 /*  8 */ ALWAYSDEFD(guest_FPROUND),
2819                 /*  9 */ ALWAYSDEFD(guest_FC3210),
2820                 /* 10 */ ALWAYSDEFD(guest_CS),
2821                 /* 11 */ ALWAYSDEFD(guest_DS),
2822                 /* 12 */ ALWAYSDEFD(guest_ES),
2823                 /* 13 */ ALWAYSDEFD(guest_FS),
2824                 /* 14 */ ALWAYSDEFD(guest_GS),
2825                 /* 15 */ ALWAYSDEFD(guest_SS),
2826                 /* 16 */ ALWAYSDEFD(guest_LDT),
2827                 /* 17 */ ALWAYSDEFD(guest_GDT),
2828                 /* 18 */ ALWAYSDEFD(guest_EMWARN),
2829                 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2830                 /* 20 */ ALWAYSDEFD(guest_TISTART),
2831                 /* 21 */ ALWAYSDEFD(guest_TILEN),
2832                 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2833                 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2834               }
2835        };
2836
2837
2838/*---------------------------------------------------------------*/
2839/*--- end                                 guest_x86_helpers.c ---*/
2840/*---------------------------------------------------------------*/
2841