1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_x86_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2011 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
40#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
45#include "host_generic_simd128.h"
46#include "host_x86_defs.h"
47
48/* TODO 21 Apr 2005:
49
50   -- (Really an assembler issue) don't emit CMov32 as a cmov
51      insn, since that's expensive on P4 and conditional branch
52      is cheaper if (as we expect) the condition is highly predictable
53
54   -- preserve xmm registers across function calls (by declaring them
55      as trashed by call insns)
56
57   -- preserve x87 ST stack discipline across function calls.  Sigh.
58
59   -- Check doHelperCall: if a call is conditional, we cannot safely
60      compute any regparm args directly to registers.  Hence, the
61      fast-regparm marshalling should be restricted to unconditional
62      calls only.
63*/
64
65/*---------------------------------------------------------*/
66/*--- x87 control word stuff                            ---*/
67/*---------------------------------------------------------*/
68
69/* Vex-generated code expects to run with the FPU set as follows: all
70   exceptions masked, round-to-nearest, precision = 53 bits.  This
71   corresponds to a FPU control word value of 0x027F.
72
73   Similarly the SSE control word (%mxcsr) should be 0x1F80.
74
75   %fpucw and %mxcsr should have these values on entry to
76   Vex-generated code, and should those values should be
77   unchanged at exit.
78*/
79
80#define DEFAULT_FPUCW 0x027F
81
82/* debugging only, do not use */
83/* define DEFAULT_FPUCW 0x037F */
84
85
86/*---------------------------------------------------------*/
87/*--- misc helpers                                      ---*/
88/*---------------------------------------------------------*/
89
90/* These are duplicated in guest-x86/toIR.c */
91static IRExpr* unop ( IROp op, IRExpr* a )
92{
93   return IRExpr_Unop(op, a);
94}
95
96static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97{
98   return IRExpr_Binop(op, a1, a2);
99}
100
101static IRExpr* bind ( Int binder )
102{
103   return IRExpr_Binder(binder);
104}
105
106static Bool isZeroU8 ( IRExpr* e )
107{
108   return e->tag == Iex_Const
109          && e->Iex.Const.con->tag == Ico_U8
110          && e->Iex.Const.con->Ico.U8 == 0;
111}
112
113static Bool isZeroU32 ( IRExpr* e )
114{
115   return e->tag == Iex_Const
116          && e->Iex.Const.con->tag == Ico_U32
117          && e->Iex.Const.con->Ico.U32 == 0;
118}
119
120static Bool isZeroU64 ( IRExpr* e )
121{
122   return e->tag == Iex_Const
123          && e->Iex.Const.con->tag == Ico_U64
124          && e->Iex.Const.con->Ico.U64 == 0ULL;
125}
126
127
128/*---------------------------------------------------------*/
129/*--- ISelEnv                                           ---*/
130/*---------------------------------------------------------*/
131
132/* This carries around:
133
134   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135     might encounter.  This is computed before insn selection starts,
136     and does not change.
137
138   - A mapping from IRTemp to HReg.  This tells the insn selector
139     which virtual register(s) are associated with each IRTemp
140     temporary.  This is computed before insn selection starts, and
141     does not change.  We expect this mapping to map precisely the
142     same set of IRTemps as the type mapping does.
143
144        - vregmap   holds the primary register for the IRTemp.
145        - vregmapHI is only used for 64-bit integer-typed
146             IRTemps.  It holds the identity of a second
147             32-bit virtual HReg, which holds the high half
148             of the value.
149
150   - The code array, that is, the insns selected so far.
151
152   - A counter, for generating new virtual registers.
153
154   - The host subarchitecture we are selecting insns for.
155     This is set at the start and does not change.
156
157   Note, this is all host-independent.  */
158
159typedef
160   struct {
161      IRTypeEnv*   type_env;
162
163      HReg*        vregmap;
164      HReg*        vregmapHI;
165      Int          n_vregmap;
166
167      HInstrArray* code;
168
169      Int          vreg_ctr;
170
171      UInt         hwcaps;
172   }
173   ISelEnv;
174
175
176static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
177{
178   vassert(tmp >= 0);
179   vassert(tmp < env->n_vregmap);
180   return env->vregmap[tmp];
181}
182
183static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
184{
185   vassert(tmp >= 0);
186   vassert(tmp < env->n_vregmap);
187   vassert(env->vregmapHI[tmp] != INVALID_HREG);
188   *vrLO = env->vregmap[tmp];
189   *vrHI = env->vregmapHI[tmp];
190}
191
192static void addInstr ( ISelEnv* env, X86Instr* instr )
193{
194   addHInstr(env->code, instr);
195   if (vex_traceflags & VEX_TRACE_VCODE) {
196      ppX86Instr(instr, False);
197      vex_printf("\n");
198   }
199}
200
201static HReg newVRegI ( ISelEnv* env )
202{
203   HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
204   env->vreg_ctr++;
205   return reg;
206}
207
208static HReg newVRegF ( ISelEnv* env )
209{
210   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
211   env->vreg_ctr++;
212   return reg;
213}
214
215static HReg newVRegV ( ISelEnv* env )
216{
217   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
218   env->vreg_ctr++;
219   return reg;
220}
221
222
223/*---------------------------------------------------------*/
224/*--- ISEL: Forward declarations                        ---*/
225/*---------------------------------------------------------*/
226
227/* These are organised as iselXXX and iselXXX_wrk pairs.  The
228   iselXXX_wrk do the real work, but are not to be called directly.
229   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
230   checks that all returned registers are virtual.  You should not
231   call the _wrk version directly.
232*/
233static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
234static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, IRExpr* e );
235
236static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
237static X86RI*      iselIntExpr_RI     ( ISelEnv* env, IRExpr* e );
238
239static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
240static X86RM*      iselIntExpr_RM     ( ISelEnv* env, IRExpr* e );
241
242static HReg        iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
243static HReg        iselIntExpr_R     ( ISelEnv* env, IRExpr* e );
244
245static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
246static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, IRExpr* e );
247
248static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
249                                       ISelEnv* env, IRExpr* e );
250static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
251                                       ISelEnv* env, IRExpr* e );
252
253static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
254static X86CondCode iselCondCode     ( ISelEnv* env, IRExpr* e );
255
256static HReg        iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
257static HReg        iselDblExpr     ( ISelEnv* env, IRExpr* e );
258
259static HReg        iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
260static HReg        iselFltExpr     ( ISelEnv* env, IRExpr* e );
261
262static HReg        iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
263static HReg        iselVecExpr     ( ISelEnv* env, IRExpr* e );
264
265
266/*---------------------------------------------------------*/
267/*--- ISEL: Misc helpers                                ---*/
268/*---------------------------------------------------------*/
269
270/* Make a int reg-reg move. */
271
272static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
273{
274   vassert(hregClass(src) == HRcInt32);
275   vassert(hregClass(dst) == HRcInt32);
276   return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
277}
278
279
280/* Make a vector reg-reg move. */
281
282static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
283{
284   vassert(hregClass(src) == HRcVec128);
285   vassert(hregClass(dst) == HRcVec128);
286   return X86Instr_SseReRg(Xsse_MOV, src, dst);
287}
288
289/* Advance/retreat %esp by n. */
290
291static void add_to_esp ( ISelEnv* env, Int n )
292{
293   vassert(n > 0 && n < 256 && (n%4) == 0);
294   addInstr(env,
295            X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
296}
297
298static void sub_from_esp ( ISelEnv* env, Int n )
299{
300   vassert(n > 0 && n < 256 && (n%4) == 0);
301   addInstr(env,
302            X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
303}
304
305
306/* Given an amode, return one which references 4 bytes further
307   along. */
308
309static X86AMode* advance4 ( X86AMode* am )
310{
311   X86AMode* am4 = dopyX86AMode(am);
312   switch (am4->tag) {
313      case Xam_IRRS:
314         am4->Xam.IRRS.imm += 4; break;
315      case Xam_IR:
316         am4->Xam.IR.imm += 4; break;
317      default:
318         vpanic("advance4(x86,host)");
319   }
320   return am4;
321}
322
323
324/* Push an arg onto the host stack, in preparation for a call to a
325   helper function of some kind.  Returns the number of 32-bit words
326   pushed. */
327
328static Int pushArg ( ISelEnv* env, IRExpr* arg )
329{
330   IRType arg_ty = typeOfIRExpr(env->type_env, arg);
331   if (arg_ty == Ity_I32) {
332      addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
333      return 1;
334   } else
335   if (arg_ty == Ity_I64) {
336      HReg rHi, rLo;
337      iselInt64Expr(&rHi, &rLo, env, arg);
338      addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
339      addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
340      return 2;
341   }
342   ppIRExpr(arg);
343   vpanic("pushArg(x86): can't handle arg of this type");
344}
345
346
347/* Complete the call to a helper function, by calling the
348   helper and clearing the args off the stack. */
349
350static
351void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
352                              IRCallee* cee, Int n_arg_ws )
353{
354   /* Complication.  Need to decide which reg to use as the fn address
355      pointer, in a way that doesn't trash regparm-passed
356      parameters. */
357   vassert(sizeof(void*) == 4);
358
359   addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
360                                    cee->regparms));
361   if (n_arg_ws > 0)
362      add_to_esp(env, 4*n_arg_ws);
363}
364
365
366/* Used only in doHelperCall.  See big comment in doHelperCall re
367   handling of regparm args.  This function figures out whether
368   evaluation of an expression might require use of a fixed register.
369   If in doubt return True (safe but suboptimal).
370*/
371static
372Bool mightRequireFixedRegs ( IRExpr* e )
373{
374   switch (e->tag) {
375      case Iex_RdTmp: case Iex_Const: case Iex_Get:
376         return False;
377      default:
378         return True;
379   }
380}
381
382
383/* Do a complete function call.  guard is a Ity_Bit expression
384   indicating whether or not the call happens.  If guard==NULL, the
385   call is unconditional. */
386
387static
388void doHelperCall ( ISelEnv* env,
389                    Bool passBBP,
390                    IRExpr* guard, IRCallee* cee, IRExpr** args )
391{
392   X86CondCode cc;
393   HReg        argregs[3];
394   HReg        tmpregs[3];
395   Bool        danger;
396   Int         not_done_yet, n_args, n_arg_ws, stack_limit,
397               i, argreg, argregX;
398
399   /* Marshal args for a call, do the call, and clear the stack.
400      Complexities to consider:
401
402      * if passBBP is True, %ebp (the baseblock pointer) is to be
403        passed as the first arg.
404
405      * If the callee claims regparmness of 1, 2 or 3, we must pass the
406        first 1, 2 or 3 args in registers (EAX, EDX, and ECX
407        respectively).  To keep things relatively simple, only args of
408        type I32 may be passed as regparms -- just bomb out if anything
409        else turns up.  Clearly this depends on the front ends not
410        trying to pass any other types as regparms.
411   */
412
413   /* 16 Nov 2004: the regparm handling is complicated by the
414      following problem.
415
416      Consider a call two a function with two regparm parameters:
417      f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
418      Suppose code is first generated to compute e1 into %eax.  Then,
419      code is generated to compute e2 into %edx.  Unfortunately, if
420      the latter code sequence uses %eax, it will trash the value of
421      e1 computed by the former sequence.  This could happen if (for
422      example) e2 itself involved a function call.  In the code below,
423      args are evaluated right-to-left, not left-to-right, but the
424      principle and the problem are the same.
425
426      One solution is to compute all regparm-bound args into vregs
427      first, and once they are all done, move them to the relevant
428      real regs.  This always gives correct code, but it also gives
429      a bunch of vreg-to-rreg moves which are usually redundant but
430      are hard for the register allocator to get rid of.
431
432      A compromise is to first examine all regparm'd argument
433      expressions.  If they are all so simple that it is clear
434      they will be evaluated without use of any fixed registers,
435      use the old compute-directly-to-fixed-target scheme.  If not,
436      be safe and use the via-vregs scheme.
437
438      Note this requires being able to examine an expression and
439      determine whether or not evaluation of it might use a fixed
440      register.  That requires knowledge of how the rest of this
441      insn selector works.  Currently just the following 3 are
442      regarded as safe -- hopefully they cover the majority of
443      arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
444   */
445   vassert(cee->regparms >= 0 && cee->regparms <= 3);
446
447   n_args = n_arg_ws = 0;
448   while (args[n_args]) n_args++;
449
450   not_done_yet = n_args;
451   if (passBBP)
452      not_done_yet++;
453
454   stack_limit = cee->regparms;
455   if (cee->regparms > 0 && passBBP) stack_limit--;
456
457   /* ------ BEGIN marshall all arguments ------ */
458
459   /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
460   for (i = n_args-1; i >= stack_limit; i--) {
461      n_arg_ws += pushArg(env, args[i]);
462      not_done_yet--;
463   }
464
465   /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
466      registers. */
467
468   if (cee->regparms > 0) {
469
470      /* ------ BEGIN deal with regparms ------ */
471
472      /* deal with regparms, not forgetting %ebp if needed. */
473      argregs[0] = hregX86_EAX();
474      argregs[1] = hregX86_EDX();
475      argregs[2] = hregX86_ECX();
476      tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
477
478      argreg = cee->regparms;
479
480      /* In keeping with big comment above, detect potential danger
481         and use the via-vregs scheme if needed. */
482      danger = False;
483      for (i = stack_limit-1; i >= 0; i--) {
484         if (mightRequireFixedRegs(args[i])) {
485            danger = True;
486            break;
487         }
488      }
489
490      if (danger) {
491
492         /* Move via temporaries */
493         argregX = argreg;
494         for (i = stack_limit-1; i >= 0; i--) {
495
496            if (0) {
497               vex_printf("x86 host: register param is complex: ");
498               ppIRExpr(args[i]);
499               vex_printf("\n");
500            }
501
502            argreg--;
503            vassert(argreg >= 0);
504            vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
505            tmpregs[argreg] = iselIntExpr_R(env, args[i]);
506            not_done_yet--;
507         }
508         for (i = stack_limit-1; i >= 0; i--) {
509            argregX--;
510            vassert(argregX >= 0);
511            addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
512         }
513
514      } else {
515         /* It's safe to compute all regparm args directly into their
516            target registers. */
517         for (i = stack_limit-1; i >= 0; i--) {
518            argreg--;
519            vassert(argreg >= 0);
520            vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
521            addInstr(env, X86Instr_Alu32R(Xalu_MOV,
522                                          iselIntExpr_RMI(env, args[i]),
523                                          argregs[argreg]));
524            not_done_yet--;
525         }
526
527      }
528
529      /* Not forgetting %ebp if needed. */
530      if (passBBP) {
531         vassert(argreg == 1);
532         addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
533         not_done_yet--;
534      }
535
536      /* ------ END deal with regparms ------ */
537
538   } else {
539
540      /* No regparms.  Heave %ebp on the stack if needed. */
541      if (passBBP) {
542         addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
543         n_arg_ws++;
544         not_done_yet--;
545      }
546
547   }
548
549   vassert(not_done_yet == 0);
550
551   /* ------ END marshall all arguments ------ */
552
553   /* Now we can compute the condition.  We can't do it earlier
554      because the argument computations could trash the condition
555      codes.  Be a bit clever to handle the common case where the
556      guard is 1:Bit. */
557   cc = Xcc_ALWAYS;
558   if (guard) {
559      if (guard->tag == Iex_Const
560          && guard->Iex.Const.con->tag == Ico_U1
561          && guard->Iex.Const.con->Ico.U1 == True) {
562         /* unconditional -- do nothing */
563      } else {
564         cc = iselCondCode( env, guard );
565      }
566   }
567
568   /* call the helper, and get the args off the stack afterwards. */
569   callHelperAndClearArgs( env, cc, cee, n_arg_ws );
570}
571
572
573/* Given a guest-state array descriptor, an index expression and a
574   bias, generate an X86AMode holding the relevant guest state
575   offset. */
576
577static
578X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
579                                IRExpr* off, Int bias )
580{
581   HReg tmp, roff;
582   Int  elemSz = sizeofIRType(descr->elemTy);
583   Int  nElems = descr->nElems;
584   Int  shift  = 0;
585
586   /* throw out any cases not generated by an x86 front end.  In
587      theory there might be a day where we need to handle them -- if
588      we ever run non-x86-guest on x86 host. */
589
590   if (nElems != 8)
591      vpanic("genGuestArrayOffset(x86 host)(1)");
592
593   switch (elemSz) {
594      case 1:  shift = 0; break;
595      case 4:  shift = 2; break;
596      case 8:  shift = 3; break;
597      default: vpanic("genGuestArrayOffset(x86 host)(2)");
598   }
599
600   /* Compute off into a reg, %off.  Then return:
601
602         movl %off, %tmp
603         addl $bias, %tmp  (if bias != 0)
604         andl %tmp, 7
605         ... base(%ebp, %tmp, shift) ...
606   */
607   tmp  = newVRegI(env);
608   roff = iselIntExpr_R(env, off);
609   addInstr(env, mk_iMOVsd_RR(roff, tmp));
610   if (bias != 0) {
611      addInstr(env,
612               X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
613   }
614   addInstr(env,
615            X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
616   return
617      X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
618}
619
620
621/* Mess with the FPU's rounding mode: set to the default rounding mode
622   (DEFAULT_FPUCW). */
623static
624void set_FPU_rounding_default ( ISelEnv* env )
625{
626   /* pushl $DEFAULT_FPUCW
627      fldcw 0(%esp)
628      addl $4, %esp
629   */
630   X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
631   addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
632   addInstr(env, X86Instr_FpLdCW(zero_esp));
633   add_to_esp(env, 4);
634}
635
636
637/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
638   expression denoting a value in the range 0 .. 3, indicating a round
639   mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
640   the same rounding.
641*/
642static
643void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
644{
645   HReg rrm  = iselIntExpr_R(env, mode);
646   HReg rrm2 = newVRegI(env);
647   X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
648
649   /* movl  %rrm, %rrm2
650      andl  $3, %rrm2   -- shouldn't be needed; paranoia
651      shll  $10, %rrm2
652      orl   $DEFAULT_FPUCW, %rrm2
653      pushl %rrm2
654      fldcw 0(%esp)
655      addl  $4, %esp
656   */
657   addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
658   addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
659   addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
660   addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
661   addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
662   addInstr(env, X86Instr_FpLdCW(zero_esp));
663   add_to_esp(env, 4);
664}
665
666
667/* Generate !src into a new vector register, and be sure that the code
668   is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
669   way to do this.
670*/
671static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
672{
673   HReg dst = newVRegV(env);
674   /* Set dst to zero.  If dst contains a NaN then all hell might
675      break loose after the comparison.  So, first zero it. */
676   addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
677   /* And now make it all 1s ... */
678   addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
679   /* Finally, xor 'src' into it. */
680   addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
681   /* Doesn't that just totally suck? */
682   return dst;
683}
684
685
686/* Round an x87 FPU value to 53-bit-mantissa precision, to be used
687   after most non-simple FPU operations (simple = +, -, *, / and
688   sqrt).
689
690   This could be done a lot more efficiently if needed, by loading
691   zero and adding it to the value to be rounded (fldz ; faddp?).
692*/
693static void roundToF64 ( ISelEnv* env, HReg reg )
694{
695   X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
696   sub_from_esp(env, 8);
697   addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
698   addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
699   add_to_esp(env, 8);
700}
701
702
703/*---------------------------------------------------------*/
704/*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
705/*---------------------------------------------------------*/
706
707/* Select insns for an integer-typed expression, and add them to the
708   code list.  Return a reg holding the result.  This reg will be a
709   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
710   want to modify it, ask for a new vreg, copy it in there, and modify
711   the copy.  The register allocator will do its best to map both
712   vregs to the same real register, so the copies will often disappear
713   later in the game.
714
715   This should handle expressions of 32, 16 and 8-bit type.  All
716   results are returned in a 32-bit register.  For 16- and 8-bit
717   expressions, the upper 16/24 bits are arbitrary, so you should mask
718   or sign extend partial values if necessary.
719*/
720
721static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
722{
723   HReg r = iselIntExpr_R_wrk(env, e);
724   /* sanity checks ... */
725#  if 0
726   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
727#  endif
728   vassert(hregClass(r) == HRcInt32);
729   vassert(hregIsVirtual(r));
730   return r;
731}
732
733/* DO NOT CALL THIS DIRECTLY ! */
734static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
735{
736   MatchInfo mi;
737
738   IRType ty = typeOfIRExpr(env->type_env,e);
739   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
740
741   switch (e->tag) {
742
743   /* --------- TEMP --------- */
744   case Iex_RdTmp: {
745      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
746   }
747
748   /* --------- LOAD --------- */
749   case Iex_Load: {
750      HReg dst = newVRegI(env);
751      X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
752
753      /* We can't handle big-endian loads, nor load-linked. */
754      if (e->Iex.Load.end != Iend_LE)
755         goto irreducible;
756
757      if (ty == Ity_I32) {
758         addInstr(env, X86Instr_Alu32R(Xalu_MOV,
759                                       X86RMI_Mem(amode), dst) );
760         return dst;
761      }
762      if (ty == Ity_I16) {
763         addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
764         return dst;
765      }
766      if (ty == Ity_I8) {
767         addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
768         return dst;
769      }
770      break;
771   }
772
773   /* --------- TERNARY OP --------- */
774   case Iex_Triop: {
775      /* C3210 flags following FPU partial remainder (fprem), both
776         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
777      if (e->Iex.Triop.op == Iop_PRemC3210F64
778          || e->Iex.Triop.op == Iop_PRem1C3210F64) {
779         HReg junk = newVRegF(env);
780         HReg dst  = newVRegI(env);
781         HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
782         HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
783         /* XXXROUNDINGFIXME */
784         /* set roundingmode here */
785         addInstr(env, X86Instr_FpBinary(
786                           e->Iex.Binop.op==Iop_PRemC3210F64
787                              ? Xfp_PREM : Xfp_PREM1,
788                           srcL,srcR,junk
789                 ));
790         /* The previous pseudo-insn will have left the FPU's C3210
791            flags set correctly.  So bag them. */
792         addInstr(env, X86Instr_FpStSW_AX());
793         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
794         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
795         return dst;
796      }
797
798      break;
799   }
800
801   /* --------- BINARY OP --------- */
802   case Iex_Binop: {
803      X86AluOp   aluOp;
804      X86ShiftOp shOp;
805
806      /* Pattern: Sub32(0,x) */
807      if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
808         HReg dst = newVRegI(env);
809         HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
810         addInstr(env, mk_iMOVsd_RR(reg,dst));
811         addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
812         return dst;
813      }
814
815      /* Is it an addition or logical style op? */
816      switch (e->Iex.Binop.op) {
817         case Iop_Add8: case Iop_Add16: case Iop_Add32:
818            aluOp = Xalu_ADD; break;
819         case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
820            aluOp = Xalu_SUB; break;
821         case Iop_And8: case Iop_And16: case Iop_And32:
822            aluOp = Xalu_AND; break;
823         case Iop_Or8: case Iop_Or16: case Iop_Or32:
824            aluOp = Xalu_OR; break;
825         case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
826            aluOp = Xalu_XOR; break;
827         case Iop_Mul16: case Iop_Mul32:
828            aluOp = Xalu_MUL; break;
829         default:
830            aluOp = Xalu_INVALID; break;
831      }
832      /* For commutative ops we assume any literal
833         values are on the second operand. */
834      if (aluOp != Xalu_INVALID) {
835         HReg dst    = newVRegI(env);
836         HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
837         X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
838         addInstr(env, mk_iMOVsd_RR(reg,dst));
839         addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
840         return dst;
841      }
842      /* Could do better here; forcing the first arg into a reg
843         isn't always clever.
844         -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
845                        LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
846                        t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
847            movl 0xFFFFFFA0(%vr41),%vr107
848            movl 0xFFFFFFA4(%vr41),%vr108
849            movl %vr107,%vr106
850            xorl %vr108,%vr106
851            movl 0xFFFFFFA8(%vr41),%vr109
852            movl %vr106,%vr105
853            andl %vr109,%vr105
854            movl 0xFFFFFFA0(%vr41),%vr110
855            movl %vr105,%vr104
856            xorl %vr110,%vr104
857            movl %vr104,%vr70
858      */
859
860      /* Perhaps a shift op? */
861      switch (e->Iex.Binop.op) {
862         case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
863            shOp = Xsh_SHL; break;
864         case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
865            shOp = Xsh_SHR; break;
866         case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
867            shOp = Xsh_SAR; break;
868         default:
869            shOp = Xsh_INVALID; break;
870      }
871      if (shOp != Xsh_INVALID) {
872         HReg dst = newVRegI(env);
873
874         /* regL = the value to be shifted */
875         HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
876         addInstr(env, mk_iMOVsd_RR(regL,dst));
877
878         /* Do any necessary widening for 16/8 bit operands */
879         switch (e->Iex.Binop.op) {
880            case Iop_Shr8:
881               addInstr(env, X86Instr_Alu32R(
882                                Xalu_AND, X86RMI_Imm(0xFF), dst));
883               break;
884            case Iop_Shr16:
885               addInstr(env, X86Instr_Alu32R(
886                                Xalu_AND, X86RMI_Imm(0xFFFF), dst));
887               break;
888            case Iop_Sar8:
889               addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
890               addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
891               break;
892            case Iop_Sar16:
893               addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
894               addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
895               break;
896            default: break;
897         }
898
899         /* Now consider the shift amount.  If it's a literal, we
900            can do a much better job than the general case. */
901         if (e->Iex.Binop.arg2->tag == Iex_Const) {
902            /* assert that the IR is well-typed */
903            Int nshift;
904            vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
905            nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
906	    vassert(nshift >= 0);
907	    if (nshift > 0)
908               /* Can't allow nshift==0 since that means %cl */
909               addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
910         } else {
911            /* General case; we have to force the amount into %cl. */
912            HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
913            addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
914            addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
915         }
916         return dst;
917      }
918
919      /* Handle misc other ops. */
920
921      if (e->Iex.Binop.op == Iop_Max32U) {
922         HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
923         HReg dst  = newVRegI(env);
924         HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
925         addInstr(env, mk_iMOVsd_RR(src1,dst));
926         addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
927         addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
928         return dst;
929      }
930
931      if (e->Iex.Binop.op == Iop_8HLto16) {
932         HReg hi8  = newVRegI(env);
933         HReg lo8  = newVRegI(env);
934         HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
935         HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
936         addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
937         addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
938         addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
939         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
940         addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
941         return hi8;
942      }
943
944      if (e->Iex.Binop.op == Iop_16HLto32) {
945         HReg hi16  = newVRegI(env);
946         HReg lo16  = newVRegI(env);
947         HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
948         HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
949         addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
950         addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
951         addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
952         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
953         addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
954         return hi16;
955      }
956
957      if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
958          || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
959         HReg a16   = newVRegI(env);
960         HReg b16   = newVRegI(env);
961         HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
962         HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
963         Int  shift = (e->Iex.Binop.op == Iop_MullS8
964                       || e->Iex.Binop.op == Iop_MullU8)
965                         ? 24 : 16;
966         X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
967                              || e->Iex.Binop.op == Iop_MullS16)
968                                ? Xsh_SAR : Xsh_SHR;
969
970         addInstr(env, mk_iMOVsd_RR(a16s, a16));
971         addInstr(env, mk_iMOVsd_RR(b16s, b16));
972         addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
973         addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
974         addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
975         addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
976         addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
977         return b16;
978      }
979
980      if (e->Iex.Binop.op == Iop_CmpF64) {
981         HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
982         HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
983         HReg dst = newVRegI(env);
984         addInstr(env, X86Instr_FpCmp(fL,fR,dst));
985         /* shift this right 8 bits so as to conform to CmpF64
986            definition. */
987         addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
988         return dst;
989      }
990
991      if (e->Iex.Binop.op == Iop_F64toI32S
992          || e->Iex.Binop.op == Iop_F64toI16S) {
993         Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
994         HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
995         HReg dst = newVRegI(env);
996
997         /* Used several times ... */
998         X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
999
1000	 /* rf now holds the value to be converted, and rrm holds the
1001	    rounding mode value, encoded as per the IRRoundingMode
1002	    enum.  The first thing to do is set the FPU's rounding
1003	    mode accordingly. */
1004
1005         /* Create a space for the format conversion. */
1006         /* subl $4, %esp */
1007         sub_from_esp(env, 4);
1008
1009	 /* Set host rounding mode */
1010	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1011
1012         /* gistw/l %rf, 0(%esp) */
1013         addInstr(env, X86Instr_FpLdStI(False/*store*/,
1014                                        toUChar(sz), rf, zero_esp));
1015
1016         if (sz == 2) {
1017            /* movzwl 0(%esp), %dst */
1018            addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1019         } else {
1020            /* movl 0(%esp), %dst */
1021            vassert(sz == 4);
1022            addInstr(env, X86Instr_Alu32R(
1023                             Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1024         }
1025
1026	 /* Restore default FPU rounding. */
1027         set_FPU_rounding_default( env );
1028
1029         /* addl $4, %esp */
1030	 add_to_esp(env, 4);
1031         return dst;
1032      }
1033
1034      break;
1035   }
1036
1037   /* --------- UNARY OP --------- */
1038   case Iex_Unop: {
1039
1040      /* 1Uto8(32to1(expr32)) */
1041      if (e->Iex.Unop.op == Iop_1Uto8) {
1042         DECLARE_PATTERN(p_32to1_then_1Uto8);
1043         DEFINE_PATTERN(p_32to1_then_1Uto8,
1044                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1045         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1046            IRExpr* expr32 = mi.bindee[0];
1047            HReg dst = newVRegI(env);
1048            HReg src = iselIntExpr_R(env, expr32);
1049            addInstr(env, mk_iMOVsd_RR(src,dst) );
1050            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1051                                          X86RMI_Imm(1), dst));
1052            return dst;
1053         }
1054      }
1055
1056      /* 8Uto32(LDle(expr32)) */
1057      if (e->Iex.Unop.op == Iop_8Uto32) {
1058         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1059         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1060                        unop(Iop_8Uto32,
1061                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1062         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1063            HReg dst = newVRegI(env);
1064            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1065            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1066            return dst;
1067         }
1068      }
1069
1070      /* 8Sto32(LDle(expr32)) */
1071      if (e->Iex.Unop.op == Iop_8Sto32) {
1072         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1073         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1074                        unop(Iop_8Sto32,
1075                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1076         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1077            HReg dst = newVRegI(env);
1078            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1079            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1080            return dst;
1081         }
1082      }
1083
1084      /* 16Uto32(LDle(expr32)) */
1085      if (e->Iex.Unop.op == Iop_16Uto32) {
1086         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1087         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1088                        unop(Iop_16Uto32,
1089                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1090         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1091            HReg dst = newVRegI(env);
1092            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1093            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1094            return dst;
1095         }
1096      }
1097
1098      /* 8Uto32(GET:I8) */
1099      if (e->Iex.Unop.op == Iop_8Uto32) {
1100         if (e->Iex.Unop.arg->tag == Iex_Get) {
1101            HReg      dst;
1102            X86AMode* amode;
1103            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1104            dst = newVRegI(env);
1105            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1106                                hregX86_EBP());
1107            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1108            return dst;
1109         }
1110      }
1111
1112      /* 16to32(GET:I16) */
1113      if (e->Iex.Unop.op == Iop_16Uto32) {
1114         if (e->Iex.Unop.arg->tag == Iex_Get) {
1115            HReg      dst;
1116            X86AMode* amode;
1117            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1118            dst = newVRegI(env);
1119            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1120                                hregX86_EBP());
1121            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1122            return dst;
1123         }
1124      }
1125
1126      switch (e->Iex.Unop.op) {
1127         case Iop_8Uto16:
1128         case Iop_8Uto32:
1129         case Iop_16Uto32: {
1130            HReg dst = newVRegI(env);
1131            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1132            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1133            addInstr(env, mk_iMOVsd_RR(src,dst) );
1134            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1135                                          X86RMI_Imm(mask), dst));
1136            return dst;
1137         }
1138         case Iop_8Sto16:
1139         case Iop_8Sto32:
1140         case Iop_16Sto32: {
1141            HReg dst = newVRegI(env);
1142            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1143            UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1144            addInstr(env, mk_iMOVsd_RR(src,dst) );
1145            addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1146            addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1147            return dst;
1148         }
1149	 case Iop_Not8:
1150	 case Iop_Not16:
1151         case Iop_Not32: {
1152            HReg dst = newVRegI(env);
1153            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1154            addInstr(env, mk_iMOVsd_RR(src,dst) );
1155            addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1156            return dst;
1157         }
1158         case Iop_64HIto32: {
1159            HReg rHi, rLo;
1160            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1161            return rHi; /* and abandon rLo .. poor wee thing :-) */
1162         }
1163         case Iop_64to32: {
1164            HReg rHi, rLo;
1165            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1166            return rLo; /* similar stupid comment to the above ... */
1167         }
1168         case Iop_16HIto8:
1169         case Iop_32HIto16: {
1170            HReg dst  = newVRegI(env);
1171            HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1172            Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1173            addInstr(env, mk_iMOVsd_RR(src,dst) );
1174            addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1175            return dst;
1176         }
1177         case Iop_1Uto32:
1178         case Iop_1Uto8: {
1179            HReg dst         = newVRegI(env);
1180            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1181            addInstr(env, X86Instr_Set32(cond,dst));
1182            return dst;
1183         }
1184         case Iop_1Sto8:
1185         case Iop_1Sto16:
1186         case Iop_1Sto32: {
1187            /* could do better than this, but for now ... */
1188            HReg dst         = newVRegI(env);
1189            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1190            addInstr(env, X86Instr_Set32(cond,dst));
1191            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1192            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1193            return dst;
1194         }
1195         case Iop_Ctz32: {
1196            /* Count trailing zeroes, implemented by x86 'bsfl' */
1197            HReg dst = newVRegI(env);
1198            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1199            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1200            return dst;
1201         }
1202         case Iop_Clz32: {
1203            /* Count leading zeroes.  Do 'bsrl' to establish the index
1204               of the highest set bit, and subtract that value from
1205               31. */
1206            HReg tmp = newVRegI(env);
1207            HReg dst = newVRegI(env);
1208            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1209            addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1210            addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1211                                          X86RMI_Imm(31), dst));
1212            addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1213                                          X86RMI_Reg(tmp), dst));
1214            return dst;
1215         }
1216
1217         case Iop_CmpwNEZ32: {
1218            HReg dst = newVRegI(env);
1219            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1220            addInstr(env, mk_iMOVsd_RR(src,dst));
1221            addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1222            addInstr(env, X86Instr_Alu32R(Xalu_OR,
1223                                          X86RMI_Reg(src), dst));
1224            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1225            return dst;
1226         }
1227         case Iop_Left8:
1228         case Iop_Left16:
1229         case Iop_Left32: {
1230            HReg dst = newVRegI(env);
1231            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1232            addInstr(env, mk_iMOVsd_RR(src, dst));
1233            addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1234            addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1235            return dst;
1236         }
1237
1238         case Iop_V128to32: {
1239            HReg      dst  = newVRegI(env);
1240            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1241            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1242            sub_from_esp(env, 16);
1243            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1244            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1245            add_to_esp(env, 16);
1246            return dst;
1247         }
1248
1249         /* ReinterpF32asI32(e) */
1250         /* Given an IEEE754 single, produce an I32 with the same bit
1251            pattern.  Keep stack 8-aligned even though only using 4
1252            bytes. */
1253         case Iop_ReinterpF32asI32: {
1254            HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
1255            HReg dst  = newVRegI(env);
1256            X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1257            /* paranoia */
1258            set_FPU_rounding_default(env);
1259            /* subl $8, %esp */
1260            sub_from_esp(env, 8);
1261            /* gstF %rf, 0(%esp) */
1262            addInstr(env,
1263                     X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1264            /* movl 0(%esp), %dst */
1265            addInstr(env,
1266                     X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1267            /* addl $8, %esp */
1268            add_to_esp(env, 8);
1269            return dst;
1270         }
1271
1272         case Iop_16to8:
1273         case Iop_32to8:
1274         case Iop_32to16:
1275            /* These are no-ops. */
1276            return iselIntExpr_R(env, e->Iex.Unop.arg);
1277
1278         default:
1279            break;
1280      }
1281      break;
1282   }
1283
1284   /* --------- GET --------- */
1285   case Iex_Get: {
1286      if (ty == Ity_I32) {
1287         HReg dst = newVRegI(env);
1288         addInstr(env, X86Instr_Alu32R(
1289                          Xalu_MOV,
1290                          X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1291                                                 hregX86_EBP())),
1292                          dst));
1293         return dst;
1294      }
1295      if (ty == Ity_I8 || ty == Ity_I16) {
1296         HReg dst = newVRegI(env);
1297         addInstr(env, X86Instr_LoadEX(
1298                          toUChar(ty==Ity_I8 ? 1 : 2),
1299                          False,
1300                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1301                          dst));
1302         return dst;
1303      }
1304      break;
1305   }
1306
1307   case Iex_GetI: {
1308      X86AMode* am
1309         = genGuestArrayOffset(
1310              env, e->Iex.GetI.descr,
1311                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1312      HReg dst = newVRegI(env);
1313      if (ty == Ity_I8) {
1314         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1315         return dst;
1316      }
1317      if (ty == Ity_I32) {
1318         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1319         return dst;
1320      }
1321      break;
1322   }
1323
1324   /* --------- CCALL --------- */
1325   case Iex_CCall: {
1326      HReg    dst = newVRegI(env);
1327      vassert(ty == e->Iex.CCall.retty);
1328
1329      /* be very restrictive for now.  Only 32/64-bit ints allowed
1330         for args, and 32 bits for return type. */
1331      if (e->Iex.CCall.retty != Ity_I32)
1332         goto irreducible;
1333
1334      /* Marshal args, do the call, clear stack. */
1335      doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1336
1337      addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1338      return dst;
1339   }
1340
1341   /* --------- LITERAL --------- */
1342   /* 32/16/8-bit literals */
1343   case Iex_Const: {
1344      X86RMI* rmi = iselIntExpr_RMI ( env, e );
1345      HReg    r   = newVRegI(env);
1346      addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1347      return r;
1348   }
1349
1350   /* --------- MULTIPLEX --------- */
1351   case Iex_Mux0X: {
1352     if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1353         && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1354        X86RM* r8;
1355        HReg   rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1356        X86RM* r0  = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1357        HReg   dst = newVRegI(env);
1358        addInstr(env, mk_iMOVsd_RR(rX,dst));
1359        r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
1360        addInstr(env, X86Instr_Test32(0xFF, r8));
1361        addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst));
1362        return dst;
1363      }
1364      break;
1365   }
1366
1367   default:
1368   break;
1369   } /* switch (e->tag) */
1370
1371   /* We get here if no pattern matched. */
1372  irreducible:
1373   ppIRExpr(e);
1374   vpanic("iselIntExpr_R: cannot reduce tree");
1375}
1376
1377
1378/*---------------------------------------------------------*/
1379/*--- ISEL: Integer expression auxiliaries              ---*/
1380/*---------------------------------------------------------*/
1381
1382/* --------------------- AMODEs --------------------- */
1383
1384/* Return an AMode which computes the value of the specified
1385   expression, possibly also adding insns to the code list as a
1386   result.  The expression may only be a 32-bit one.
1387*/
1388
1389static Bool sane_AMode ( X86AMode* am )
1390{
1391   switch (am->tag) {
1392      case Xam_IR:
1393         return
1394            toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1395                    && (hregIsVirtual(am->Xam.IR.reg)
1396                        || am->Xam.IR.reg == hregX86_EBP()) );
1397      case Xam_IRRS:
1398         return
1399            toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1400                    && hregIsVirtual(am->Xam.IRRS.base)
1401                    && hregClass(am->Xam.IRRS.index) == HRcInt32
1402                    && hregIsVirtual(am->Xam.IRRS.index) );
1403      default:
1404        vpanic("sane_AMode: unknown x86 amode tag");
1405   }
1406}
1407
1408static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1409{
1410   X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1411   vassert(sane_AMode(am));
1412   return am;
1413}
1414
1415/* DO NOT CALL THIS DIRECTLY ! */
1416static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1417{
1418   IRType ty = typeOfIRExpr(env->type_env,e);
1419   vassert(ty == Ity_I32);
1420
1421   /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1422   if (e->tag == Iex_Binop
1423       && e->Iex.Binop.op == Iop_Add32
1424       && e->Iex.Binop.arg2->tag == Iex_Const
1425       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1426       && e->Iex.Binop.arg1->tag == Iex_Binop
1427       && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1428       && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1429       && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1430       && e->Iex.Binop.arg1
1431           ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1432       && e->Iex.Binop.arg1
1433           ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1434      UInt shift = e->Iex.Binop.arg1
1435                    ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1436      UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1437      if (shift == 1 || shift == 2 || shift == 3) {
1438         HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1439         HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1440                                       ->Iex.Binop.arg2->Iex.Binop.arg1 );
1441         return X86AMode_IRRS(imm32, r1, r2, shift);
1442      }
1443   }
1444
1445   /* Add32(expr1, Shl32(expr2, imm)) */
1446   if (e->tag == Iex_Binop
1447       && e->Iex.Binop.op == Iop_Add32
1448       && e->Iex.Binop.arg2->tag == Iex_Binop
1449       && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1450       && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1451       && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1452      UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1453      if (shift == 1 || shift == 2 || shift == 3) {
1454         HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1455         HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1456         return X86AMode_IRRS(0, r1, r2, shift);
1457      }
1458   }
1459
1460   /* Add32(expr,i) */
1461   if (e->tag == Iex_Binop
1462       && e->Iex.Binop.op == Iop_Add32
1463       && e->Iex.Binop.arg2->tag == Iex_Const
1464       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1465      HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
1466      return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1467   }
1468
1469   /* Doesn't match anything in particular.  Generate it into
1470      a register and use that. */
1471   {
1472      HReg r1 = iselIntExpr_R(env, e);
1473      return X86AMode_IR(0, r1);
1474   }
1475}
1476
1477
1478/* --------------------- RMIs --------------------- */
1479
1480/* Similarly, calculate an expression into an X86RMI operand.  As with
1481   iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1482
1483static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1484{
1485   X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1486   /* sanity checks ... */
1487   switch (rmi->tag) {
1488      case Xrmi_Imm:
1489         return rmi;
1490      case Xrmi_Reg:
1491         vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1492         vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1493         return rmi;
1494      case Xrmi_Mem:
1495         vassert(sane_AMode(rmi->Xrmi.Mem.am));
1496         return rmi;
1497      default:
1498         vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1499   }
1500}
1501
1502/* DO NOT CALL THIS DIRECTLY ! */
1503static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1504{
1505   IRType ty = typeOfIRExpr(env->type_env,e);
1506   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1507
1508   /* special case: immediate */
1509   if (e->tag == Iex_Const) {
1510      UInt u;
1511      switch (e->Iex.Const.con->tag) {
1512         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1513         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1514         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1515         default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1516      }
1517      return X86RMI_Imm(u);
1518   }
1519
1520   /* special case: 32-bit GET */
1521   if (e->tag == Iex_Get && ty == Ity_I32) {
1522      return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1523                                    hregX86_EBP()));
1524   }
1525
1526   /* special case: 32-bit load from memory */
1527   if (e->tag == Iex_Load && ty == Ity_I32
1528       && e->Iex.Load.end == Iend_LE) {
1529      X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1530      return X86RMI_Mem(am);
1531   }
1532
1533   /* default case: calculate into a register and return that */
1534   {
1535      HReg r = iselIntExpr_R ( env, e );
1536      return X86RMI_Reg(r);
1537   }
1538}
1539
1540
1541/* --------------------- RIs --------------------- */
1542
1543/* Calculate an expression into an X86RI operand.  As with
1544   iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1545
1546static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1547{
1548   X86RI* ri = iselIntExpr_RI_wrk(env, e);
1549   /* sanity checks ... */
1550   switch (ri->tag) {
1551      case Xri_Imm:
1552         return ri;
1553      case Xri_Reg:
1554         vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1555         vassert(hregIsVirtual(ri->Xri.Reg.reg));
1556         return ri;
1557      default:
1558         vpanic("iselIntExpr_RI: unknown x86 RI tag");
1559   }
1560}
1561
1562/* DO NOT CALL THIS DIRECTLY ! */
1563static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1564{
1565   IRType ty = typeOfIRExpr(env->type_env,e);
1566   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1567
1568   /* special case: immediate */
1569   if (e->tag == Iex_Const) {
1570      UInt u;
1571      switch (e->Iex.Const.con->tag) {
1572         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1573         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1574         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1575         default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1576      }
1577      return X86RI_Imm(u);
1578   }
1579
1580   /* default case: calculate into a register and return that */
1581   {
1582      HReg r = iselIntExpr_R ( env, e );
1583      return X86RI_Reg(r);
1584   }
1585}
1586
1587
1588/* --------------------- RMs --------------------- */
1589
1590/* Similarly, calculate an expression into an X86RM operand.  As with
1591   iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1592
1593static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1594{
1595   X86RM* rm = iselIntExpr_RM_wrk(env, e);
1596   /* sanity checks ... */
1597   switch (rm->tag) {
1598      case Xrm_Reg:
1599         vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1600         vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1601         return rm;
1602      case Xrm_Mem:
1603         vassert(sane_AMode(rm->Xrm.Mem.am));
1604         return rm;
1605      default:
1606         vpanic("iselIntExpr_RM: unknown x86 RM tag");
1607   }
1608}
1609
1610/* DO NOT CALL THIS DIRECTLY ! */
1611static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1612{
1613   IRType ty = typeOfIRExpr(env->type_env,e);
1614   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1615
1616   /* special case: 32-bit GET */
1617   if (e->tag == Iex_Get && ty == Ity_I32) {
1618      return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1619                                   hregX86_EBP()));
1620   }
1621
1622   /* special case: load from memory */
1623
1624   /* default case: calculate into a register and return that */
1625   {
1626      HReg r = iselIntExpr_R ( env, e );
1627      return X86RM_Reg(r);
1628   }
1629}
1630
1631
1632/* --------------------- CONDCODE --------------------- */
1633
1634/* Generate code to evaluated a bit-typed expression, returning the
1635   condition code which would correspond when the expression would
1636   notionally have returned 1. */
1637
1638static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1639{
1640   /* Uh, there's nothing we can sanity check here, unfortunately. */
1641   return iselCondCode_wrk(env,e);
1642}
1643
1644/* DO NOT CALL THIS DIRECTLY ! */
1645static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1646{
1647   MatchInfo mi;
1648
1649   vassert(e);
1650   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1651
1652   /* var */
1653   if (e->tag == Iex_RdTmp) {
1654      HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1655      /* Test32 doesn't modify r32; so this is OK. */
1656      addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1657      return Xcc_NZ;
1658   }
1659
1660   /* Constant 1:Bit */
1661   if (e->tag == Iex_Const) {
1662      HReg r;
1663      vassert(e->Iex.Const.con->tag == Ico_U1);
1664      vassert(e->Iex.Const.con->Ico.U1 == True
1665              || e->Iex.Const.con->Ico.U1 == False);
1666      r = newVRegI(env);
1667      addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1668      addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1669      return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1670   }
1671
1672   /* Not1(e) */
1673   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1674      /* Generate code for the arg, and negate the test condition */
1675      return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1676   }
1677
1678   /* --- patterns rooted at: 32to1 --- */
1679
1680   if (e->tag == Iex_Unop
1681       && e->Iex.Unop.op == Iop_32to1) {
1682      X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1683      addInstr(env, X86Instr_Test32(1,rm));
1684      return Xcc_NZ;
1685   }
1686
1687   /* --- patterns rooted at: CmpNEZ8 --- */
1688
1689   /* CmpNEZ8(x) */
1690   if (e->tag == Iex_Unop
1691       && e->Iex.Unop.op == Iop_CmpNEZ8) {
1692      X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1693      addInstr(env, X86Instr_Test32(0xFF,rm));
1694      return Xcc_NZ;
1695   }
1696
1697   /* --- patterns rooted at: CmpNEZ16 --- */
1698
1699   /* CmpNEZ16(x) */
1700   if (e->tag == Iex_Unop
1701       && e->Iex.Unop.op == Iop_CmpNEZ16) {
1702      X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1703      addInstr(env, X86Instr_Test32(0xFFFF,rm));
1704      return Xcc_NZ;
1705   }
1706
1707   /* --- patterns rooted at: CmpNEZ32 --- */
1708
1709   /* CmpNEZ32(And32(x,y)) */
1710   {
1711      DECLARE_PATTERN(p_CmpNEZ32_And32);
1712      DEFINE_PATTERN(p_CmpNEZ32_And32,
1713                     unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1714      if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1715         HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1716         X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1717         HReg    tmp  = newVRegI(env);
1718         addInstr(env, mk_iMOVsd_RR(r0, tmp));
1719         addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1720         return Xcc_NZ;
1721      }
1722   }
1723
1724   /* CmpNEZ32(Or32(x,y)) */
1725   {
1726      DECLARE_PATTERN(p_CmpNEZ32_Or32);
1727      DEFINE_PATTERN(p_CmpNEZ32_Or32,
1728                     unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1729      if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1730         HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
1731         X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1732         HReg    tmp  = newVRegI(env);
1733         addInstr(env, mk_iMOVsd_RR(r0, tmp));
1734         addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1735         return Xcc_NZ;
1736      }
1737   }
1738
1739   /* CmpNEZ32(GET(..):I32) */
1740   if (e->tag == Iex_Unop
1741       && e->Iex.Unop.op == Iop_CmpNEZ32
1742       && e->Iex.Unop.arg->tag == Iex_Get) {
1743      X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1744                                 hregX86_EBP());
1745      addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1746      return Xcc_NZ;
1747   }
1748
1749   /* CmpNEZ32(x) */
1750   if (e->tag == Iex_Unop
1751       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1752      HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1753      X86RMI* rmi2 = X86RMI_Imm(0);
1754      addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1755      return Xcc_NZ;
1756   }
1757
1758   /* --- patterns rooted at: CmpNEZ64 --- */
1759
1760   /* CmpNEZ64(Or64(x,y)) */
1761   {
1762      DECLARE_PATTERN(p_CmpNEZ64_Or64);
1763      DEFINE_PATTERN(p_CmpNEZ64_Or64,
1764                     unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1765      if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1766         HReg    hi1, lo1, hi2, lo2;
1767         HReg    tmp  = newVRegI(env);
1768         iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1769         addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1770         addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1771         iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1772         addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1773         addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1774         return Xcc_NZ;
1775      }
1776   }
1777
1778   /* CmpNEZ64(x) */
1779   if (e->tag == Iex_Unop
1780       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1781      HReg hi, lo;
1782      HReg tmp = newVRegI(env);
1783      iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1784      addInstr(env, mk_iMOVsd_RR(hi, tmp));
1785      addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1786      return Xcc_NZ;
1787   }
1788
1789   /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1790
1791   /* CmpEQ8 / CmpNE8 */
1792   if (e->tag == Iex_Binop
1793       && (e->Iex.Binop.op == Iop_CmpEQ8
1794           || e->Iex.Binop.op == Iop_CmpNE8
1795           || e->Iex.Binop.op == Iop_CasCmpEQ8
1796           || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1797      if (isZeroU8(e->Iex.Binop.arg2)) {
1798         HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1799         addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1800         switch (e->Iex.Binop.op) {
1801            case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1802            case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1803            default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1804         }
1805      } else {
1806         HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1807         X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1808         HReg    r    = newVRegI(env);
1809         addInstr(env, mk_iMOVsd_RR(r1,r));
1810         addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1811         addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1812         switch (e->Iex.Binop.op) {
1813            case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1814            case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1815            default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1816         }
1817      }
1818   }
1819
1820   /* CmpEQ16 / CmpNE16 */
1821   if (e->tag == Iex_Binop
1822       && (e->Iex.Binop.op == Iop_CmpEQ16
1823           || e->Iex.Binop.op == Iop_CmpNE16
1824           || e->Iex.Binop.op == Iop_CasCmpEQ16
1825           || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1826      HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1827      X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1828      HReg    r    = newVRegI(env);
1829      addInstr(env, mk_iMOVsd_RR(r1,r));
1830      addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1831      addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1832      switch (e->Iex.Binop.op) {
1833         case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
1834         case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
1835         default: vpanic("iselCondCode(x86): CmpXX16");
1836      }
1837   }
1838
1839   /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1840      Saves a "movl %eax, %tmp" compared to the default route. */
1841   if (e->tag == Iex_Binop
1842       && e->Iex.Binop.op == Iop_CmpNE32
1843       && e->Iex.Binop.arg1->tag == Iex_CCall
1844       && e->Iex.Binop.arg2->tag == Iex_Const) {
1845      IRExpr* cal = e->Iex.Binop.arg1;
1846      IRExpr* con = e->Iex.Binop.arg2;
1847      /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1848      vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1849      vassert(con->Iex.Const.con->tag == Ico_U32);
1850      /* Marshal args, do the call. */
1851      doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args );
1852      addInstr(env, X86Instr_Alu32R(Xalu_CMP,
1853                                    X86RMI_Imm(con->Iex.Const.con->Ico.U32),
1854                                    hregX86_EAX()));
1855      return Xcc_NZ;
1856   }
1857
1858   /* Cmp*32*(x,y) */
1859   if (e->tag == Iex_Binop
1860       && (e->Iex.Binop.op == Iop_CmpEQ32
1861           || e->Iex.Binop.op == Iop_CmpNE32
1862           || e->Iex.Binop.op == Iop_CmpLT32S
1863           || e->Iex.Binop.op == Iop_CmpLT32U
1864           || e->Iex.Binop.op == Iop_CmpLE32S
1865           || e->Iex.Binop.op == Iop_CmpLE32U
1866           || e->Iex.Binop.op == Iop_CasCmpEQ32
1867           || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1868      HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1869      X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1870      addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1871      switch (e->Iex.Binop.op) {
1872         case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
1873         case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
1874         case Iop_CmpLT32S: return Xcc_L;
1875         case Iop_CmpLT32U: return Xcc_B;
1876         case Iop_CmpLE32S: return Xcc_LE;
1877         case Iop_CmpLE32U: return Xcc_BE;
1878         default: vpanic("iselCondCode(x86): CmpXX32");
1879      }
1880   }
1881
1882   /* CmpNE64 */
1883   if (e->tag == Iex_Binop
1884       && (e->Iex.Binop.op == Iop_CmpNE64
1885           || e->Iex.Binop.op == Iop_CmpEQ64)) {
1886      HReg hi1, hi2, lo1, lo2;
1887      HReg tHi = newVRegI(env);
1888      HReg tLo = newVRegI(env);
1889      iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1890      iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1891      addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1892      addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1893      addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1894      addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1895      addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1896      switch (e->Iex.Binop.op) {
1897         case Iop_CmpNE64: return Xcc_NZ;
1898         case Iop_CmpEQ64: return Xcc_Z;
1899         default: vpanic("iselCondCode(x86): CmpXX64");
1900      }
1901   }
1902
1903   ppIRExpr(e);
1904   vpanic("iselCondCode");
1905}
1906
1907
1908/*---------------------------------------------------------*/
1909/*--- ISEL: Integer expressions (64 bit)                ---*/
1910/*---------------------------------------------------------*/
1911
1912/* Compute a 64-bit value into a register pair, which is returned as
1913   the first two parameters.  As with iselIntExpr_R, these may be
1914   either real or virtual regs; in any case they must not be changed
1915   by subsequent code emitted by the caller.  */
1916
1917static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1918{
1919   iselInt64Expr_wrk(rHi, rLo, env, e);
1920#  if 0
1921   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1922#  endif
1923   vassert(hregClass(*rHi) == HRcInt32);
1924   vassert(hregIsVirtual(*rHi));
1925   vassert(hregClass(*rLo) == HRcInt32);
1926   vassert(hregIsVirtual(*rLo));
1927}
1928
1929/* DO NOT CALL THIS DIRECTLY ! */
1930static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1931{
1932   MatchInfo mi;
1933   HWord fn = 0; /* helper fn for most SIMD64 stuff */
1934   vassert(e);
1935   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1936
1937   /* 64-bit literal */
1938   if (e->tag == Iex_Const) {
1939      ULong w64 = e->Iex.Const.con->Ico.U64;
1940      UInt  wHi = toUInt(w64 >> 32);
1941      UInt  wLo = toUInt(w64);
1942      HReg  tLo = newVRegI(env);
1943      HReg  tHi = newVRegI(env);
1944      vassert(e->Iex.Const.con->tag == Ico_U64);
1945      if (wLo == wHi) {
1946         /* Save a precious Int register in this special case. */
1947         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1948         *rHi = tLo;
1949         *rLo = tLo;
1950      } else {
1951         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
1952         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1953         *rHi = tHi;
1954         *rLo = tLo;
1955      }
1956      return;
1957   }
1958
1959   /* read 64-bit IRTemp */
1960   if (e->tag == Iex_RdTmp) {
1961      lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1962      return;
1963   }
1964
1965   /* 64-bit load */
1966   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1967      HReg     tLo, tHi;
1968      X86AMode *am0, *am4;
1969      vassert(e->Iex.Load.ty == Ity_I64);
1970      tLo = newVRegI(env);
1971      tHi = newVRegI(env);
1972      am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
1973      am4 = advance4(am0);
1974      addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
1975      addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1976      *rHi = tHi;
1977      *rLo = tLo;
1978      return;
1979   }
1980
1981   /* 64-bit GET */
1982   if (e->tag == Iex_Get) {
1983      X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
1984      X86AMode* am4 = advance4(am);
1985      HReg tLo = newVRegI(env);
1986      HReg tHi = newVRegI(env);
1987      addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
1988      addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1989      *rHi = tHi;
1990      *rLo = tLo;
1991      return;
1992   }
1993
1994   /* 64-bit GETI */
1995   if (e->tag == Iex_GetI) {
1996      X86AMode* am
1997         = genGuestArrayOffset( env, e->Iex.GetI.descr,
1998                                     e->Iex.GetI.ix, e->Iex.GetI.bias );
1999      X86AMode* am4 = advance4(am);
2000      HReg tLo = newVRegI(env);
2001      HReg tHi = newVRegI(env);
2002      addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2003      addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2004      *rHi = tHi;
2005      *rLo = tLo;
2006      return;
2007   }
2008
2009   /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */
2010   if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) {
2011      X86RM* r8;
2012      HReg e0Lo, e0Hi;
2013      HReg tLo = newVRegI(env);
2014      HReg tHi = newVRegI(env);
2015      X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2016      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2017      r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2018      addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2019      addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2020      addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2021      addInstr(env, X86Instr_Test32(0xFF, r8));
2022      addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi));
2023      addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo));
2024      add_to_esp(env, 4);
2025      *rHi = tHi;
2026      *rLo = tLo;
2027      return;
2028   }
2029   /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */
2030   if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) {
2031      X86RM* r8;
2032      HReg e0Lo, e0Hi;
2033      HReg tLo = newVRegI(env);
2034      HReg tHi = newVRegI(env);
2035      X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2036      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX);
2037      r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2038      addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2039      addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2040      addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2041      addInstr(env, X86Instr_Test32(0xFF, r8));
2042      addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi));
2043      addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo));
2044      add_to_esp(env, 4);
2045      *rHi = tHi;
2046      *rLo = tLo;
2047      return;
2048   }
2049
2050   /* 64-bit Mux0X: Mux0X(g, expr, expr) */
2051   if (e->tag == Iex_Mux0X) {
2052      X86RM* r8;
2053      HReg e0Lo, e0Hi, eXLo, eXHi;
2054      HReg tLo = newVRegI(env);
2055      HReg tHi = newVRegI(env);
2056      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2057      iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2058      addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2059      addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2060      r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2061      addInstr(env, X86Instr_Test32(0xFF, r8));
2062      /* This assumes the first cmov32 doesn't trash the condition
2063         codes, so they are still available for the second cmov32 */
2064      addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2065      addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2066      *rHi = tHi;
2067      *rLo = tLo;
2068      return;
2069   }
2070
2071   /* --------- BINARY ops --------- */
2072   if (e->tag == Iex_Binop) {
2073      switch (e->Iex.Binop.op) {
2074         /* 32 x 32 -> 64 multiply */
2075         case Iop_MullU32:
2076         case Iop_MullS32: {
2077            /* get one operand into %eax, and the other into a R/M.
2078               Need to make an educated guess about which is better in
2079               which. */
2080            HReg   tLo    = newVRegI(env);
2081            HReg   tHi    = newVRegI(env);
2082            Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
2083            X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2084            HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2085            addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2086            addInstr(env, X86Instr_MulL(syned, rmLeft));
2087            /* Result is now in EDX:EAX.  Tell the caller. */
2088            addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2089            addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2090            *rHi = tHi;
2091            *rLo = tLo;
2092            return;
2093         }
2094
2095         /* 64 x 32 -> (32(rem),32(div)) division */
2096         case Iop_DivModU64to32:
2097         case Iop_DivModS64to32: {
2098            /* Get the 64-bit operand into edx:eax, and the other into
2099               any old R/M. */
2100            HReg sHi, sLo;
2101            HReg   tLo     = newVRegI(env);
2102            HReg   tHi     = newVRegI(env);
2103            Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2104            X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2105            iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2106            addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2107            addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2108            addInstr(env, X86Instr_Div(syned, rmRight));
2109            addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2110            addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2111            *rHi = tHi;
2112            *rLo = tLo;
2113            return;
2114         }
2115
2116         /* Or64/And64/Xor64 */
2117         case Iop_Or64:
2118         case Iop_And64:
2119         case Iop_Xor64: {
2120            HReg xLo, xHi, yLo, yHi;
2121            HReg tLo = newVRegI(env);
2122            HReg tHi = newVRegI(env);
2123            X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2124                          : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2125                          : Xalu_XOR;
2126            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2127            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2128            addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2129            addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2130            addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2131            addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2132            *rHi = tHi;
2133            *rLo = tLo;
2134            return;
2135         }
2136
2137         /* Add64/Sub64 */
2138         case Iop_Add64:
2139            if (e->Iex.Binop.arg2->tag == Iex_Const) {
2140               /* special case Add64(e, const) */
2141               ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2142               UInt  wHi = toUInt(w64 >> 32);
2143               UInt  wLo = toUInt(w64);
2144               HReg  tLo = newVRegI(env);
2145               HReg  tHi = newVRegI(env);
2146               HReg  xLo, xHi;
2147               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2148               iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2149               addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2150               addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2151               addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2152               addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2153               *rHi = tHi;
2154               *rLo = tLo;
2155               return;
2156            }
2157            /* else fall through to the generic case */
2158         case Iop_Sub64: {
2159            HReg xLo, xHi, yLo, yHi;
2160            HReg tLo = newVRegI(env);
2161            HReg tHi = newVRegI(env);
2162            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2163            addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2164            addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2165            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2166            if (e->Iex.Binop.op==Iop_Add64) {
2167               addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2168               addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2169            } else {
2170               addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2171               addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2172            }
2173            *rHi = tHi;
2174            *rLo = tLo;
2175            return;
2176         }
2177
2178         /* 32HLto64(e1,e2) */
2179         case Iop_32HLto64:
2180            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2181            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2182            return;
2183
2184         /* 64-bit shifts */
2185         case Iop_Shl64: {
2186            /* We use the same ingenious scheme as gcc.  Put the value
2187               to be shifted into %hi:%lo, and the shift amount into
2188               %cl.  Then (dsts on right, a la ATT syntax):
2189
2190               shldl %cl, %lo, %hi   -- make %hi be right for the
2191                                     -- shift amt %cl % 32
2192               shll  %cl, %lo        -- make %lo be right for the
2193                                     -- shift amt %cl % 32
2194
2195               Now, if (shift amount % 64) is in the range 32 .. 63,
2196               we have to do a fixup, which puts the result low half
2197               into the result high half, and zeroes the low half:
2198
2199               testl $32, %ecx
2200
2201               cmovnz %lo, %hi
2202               movl $0, %tmp         -- sigh; need yet another reg
2203               cmovnz %tmp, %lo
2204            */
2205            HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2206            tLo = newVRegI(env);
2207            tHi = newVRegI(env);
2208            tTemp = newVRegI(env);
2209            rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2210            iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2211            addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2212            addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2213            addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2214            /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2215               and those regs are legitimately modifiable. */
2216            addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2217            addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2218            addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2219            addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2220            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2221            addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2222            *rHi = tHi;
2223            *rLo = tLo;
2224            return;
2225         }
2226
2227         case Iop_Shr64: {
2228            /* We use the same ingenious scheme as gcc.  Put the value
2229               to be shifted into %hi:%lo, and the shift amount into
2230               %cl.  Then:
2231
2232               shrdl %cl, %hi, %lo   -- make %lo be right for the
2233                                     -- shift amt %cl % 32
2234               shrl  %cl, %hi        -- make %hi be right for the
2235                                     -- shift amt %cl % 32
2236
2237               Now, if (shift amount % 64) is in the range 32 .. 63,
2238               we have to do a fixup, which puts the result high half
2239               into the result low half, and zeroes the high half:
2240
2241               testl $32, %ecx
2242
2243               cmovnz %hi, %lo
2244               movl $0, %tmp         -- sigh; need yet another reg
2245               cmovnz %tmp, %hi
2246            */
2247            HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2248            tLo = newVRegI(env);
2249            tHi = newVRegI(env);
2250            tTemp = newVRegI(env);
2251            rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2252            iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2253            addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2254            addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2255            addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2256            /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2257               and those regs are legitimately modifiable. */
2258            addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2259            addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2260            addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2261            addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2262            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2263            addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2264            *rHi = tHi;
2265            *rLo = tLo;
2266            return;
2267         }
2268
2269         /* F64 -> I64 */
2270         /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2271            case.  Unfortunately I see no easy way to avoid the
2272            duplication. */
2273         case Iop_F64toI64S: {
2274            HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2275            HReg tLo = newVRegI(env);
2276            HReg tHi = newVRegI(env);
2277
2278            /* Used several times ... */
2279            /* Careful ... this sharing is only safe because
2280	       zero_esp/four_esp do not hold any registers which the
2281	       register allocator could attempt to swizzle later. */
2282            X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2283            X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2284
2285            /* rf now holds the value to be converted, and rrm holds
2286               the rounding mode value, encoded as per the
2287               IRRoundingMode enum.  The first thing to do is set the
2288               FPU's rounding mode accordingly. */
2289
2290            /* Create a space for the format conversion. */
2291            /* subl $8, %esp */
2292            sub_from_esp(env, 8);
2293
2294            /* Set host rounding mode */
2295            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2296
2297            /* gistll %rf, 0(%esp) */
2298            addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2299
2300            /* movl 0(%esp), %dstLo */
2301            /* movl 4(%esp), %dstHi */
2302            addInstr(env, X86Instr_Alu32R(
2303                             Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2304            addInstr(env, X86Instr_Alu32R(
2305                             Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2306
2307            /* Restore default FPU rounding. */
2308            set_FPU_rounding_default( env );
2309
2310            /* addl $8, %esp */
2311            add_to_esp(env, 8);
2312
2313            *rHi = tHi;
2314            *rLo = tLo;
2315            return;
2316         }
2317
2318         case Iop_Add8x8:
2319            fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2320         case Iop_Add16x4:
2321            fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2322         case Iop_Add32x2:
2323            fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2324
2325         case Iop_Avg8Ux8:
2326            fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2327         case Iop_Avg16Ux4:
2328            fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2329
2330         case Iop_CmpEQ8x8:
2331            fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2332         case Iop_CmpEQ16x4:
2333            fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2334         case Iop_CmpEQ32x2:
2335            fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2336
2337         case Iop_CmpGT8Sx8:
2338            fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2339         case Iop_CmpGT16Sx4:
2340            fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2341         case Iop_CmpGT32Sx2:
2342            fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2343
2344         case Iop_InterleaveHI8x8:
2345            fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2346         case Iop_InterleaveLO8x8:
2347            fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2348         case Iop_InterleaveHI16x4:
2349            fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2350         case Iop_InterleaveLO16x4:
2351            fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2352         case Iop_InterleaveHI32x2:
2353            fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2354         case Iop_InterleaveLO32x2:
2355            fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2356         case Iop_CatOddLanes16x4:
2357            fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2358         case Iop_CatEvenLanes16x4:
2359            fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2360         case Iop_Perm8x8:
2361            fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2362
2363         case Iop_Max8Ux8:
2364            fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2365         case Iop_Max16Sx4:
2366            fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2367         case Iop_Min8Ux8:
2368            fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2369         case Iop_Min16Sx4:
2370            fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2371
2372         case Iop_Mul16x4:
2373            fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2374         case Iop_Mul32x2:
2375            fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2376         case Iop_MulHi16Sx4:
2377            fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2378         case Iop_MulHi16Ux4:
2379            fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2380
2381         case Iop_QAdd8Sx8:
2382            fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2383         case Iop_QAdd16Sx4:
2384            fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2385         case Iop_QAdd8Ux8:
2386            fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2387         case Iop_QAdd16Ux4:
2388            fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2389
2390         case Iop_QNarrowBin32Sto16Sx4:
2391            fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2392         case Iop_QNarrowBin16Sto8Sx8:
2393            fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2394         case Iop_QNarrowBin16Sto8Ux8:
2395            fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2396         case Iop_NarrowBin16to8x8:
2397            fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2398         case Iop_NarrowBin32to16x4:
2399            fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2400
2401         case Iop_QSub8Sx8:
2402            fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2403         case Iop_QSub16Sx4:
2404            fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2405         case Iop_QSub8Ux8:
2406            fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2407         case Iop_QSub16Ux4:
2408            fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2409
2410         case Iop_Sub8x8:
2411            fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2412         case Iop_Sub16x4:
2413            fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2414         case Iop_Sub32x2:
2415            fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2416
2417         binnish: {
2418            /* Note: the following assumes all helpers are of
2419               signature
2420                  ULong fn ( ULong, ULong ), and they are
2421               not marked as regparm functions.
2422            */
2423            HReg xLo, xHi, yLo, yHi;
2424            HReg tLo = newVRegI(env);
2425            HReg tHi = newVRegI(env);
2426            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2427            addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2428            addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2429            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2430            addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2431            addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2432            addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2433            add_to_esp(env, 4*4);
2434            addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2435            addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2436            *rHi = tHi;
2437            *rLo = tLo;
2438            return;
2439         }
2440
2441         case Iop_ShlN32x2:
2442            fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2443         case Iop_ShlN16x4:
2444            fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2445         case Iop_ShlN8x8:
2446            fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
2447         case Iop_ShrN32x2:
2448            fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2449         case Iop_ShrN16x4:
2450            fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2451         case Iop_SarN32x2:
2452            fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2453         case Iop_SarN16x4:
2454            fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2455         case Iop_SarN8x8:
2456            fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
2457         shifty: {
2458            /* Note: the following assumes all helpers are of
2459               signature
2460                  ULong fn ( ULong, UInt ), and they are
2461               not marked as regparm functions.
2462            */
2463            HReg xLo, xHi;
2464            HReg tLo = newVRegI(env);
2465            HReg tHi = newVRegI(env);
2466            X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2467            addInstr(env, X86Instr_Push(y));
2468            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2469            addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2470            addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2471            addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2472            add_to_esp(env, 3*4);
2473            addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2474            addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2475            *rHi = tHi;
2476            *rLo = tLo;
2477            return;
2478         }
2479
2480         default:
2481            break;
2482      }
2483   } /* if (e->tag == Iex_Binop) */
2484
2485
2486   /* --------- UNARY ops --------- */
2487   if (e->tag == Iex_Unop) {
2488      switch (e->Iex.Unop.op) {
2489
2490         /* 32Sto64(e) */
2491         case Iop_32Sto64: {
2492            HReg tLo = newVRegI(env);
2493            HReg tHi = newVRegI(env);
2494            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2495            addInstr(env, mk_iMOVsd_RR(src,tHi));
2496            addInstr(env, mk_iMOVsd_RR(src,tLo));
2497            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2498            *rHi = tHi;
2499            *rLo = tLo;
2500            return;
2501         }
2502
2503         /* 32Uto64(e) */
2504         case Iop_32Uto64: {
2505            HReg tLo = newVRegI(env);
2506            HReg tHi = newVRegI(env);
2507            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2508            addInstr(env, mk_iMOVsd_RR(src,tLo));
2509            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2510            *rHi = tHi;
2511            *rLo = tLo;
2512            return;
2513         }
2514
2515         /* 16Uto64(e) */
2516         case Iop_16Uto64: {
2517            HReg tLo = newVRegI(env);
2518            HReg tHi = newVRegI(env);
2519            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2520            addInstr(env, mk_iMOVsd_RR(src,tLo));
2521            addInstr(env, X86Instr_Alu32R(Xalu_AND,
2522                                          X86RMI_Imm(0xFFFF), tLo));
2523            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2524            *rHi = tHi;
2525            *rLo = tLo;
2526            return;
2527         }
2528
2529         /* V128{HI}to64 */
2530         case Iop_V128HIto64:
2531         case Iop_V128to64: {
2532            Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2533            HReg tLo = newVRegI(env);
2534            HReg tHi = newVRegI(env);
2535            HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2536            X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
2537            X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
2538            X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2539            sub_from_esp(env, 16);
2540            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2541            addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2542                                           X86RMI_Mem(espLO), tLo ));
2543            addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2544                                           X86RMI_Mem(espHI), tHi ));
2545            add_to_esp(env, 16);
2546            *rHi = tHi;
2547            *rLo = tLo;
2548            return;
2549         }
2550
2551         /* could do better than this, but for now ... */
2552         case Iop_1Sto64: {
2553            HReg tLo = newVRegI(env);
2554            HReg tHi = newVRegI(env);
2555            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2556            addInstr(env, X86Instr_Set32(cond,tLo));
2557            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2558            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2559            addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2560            *rHi = tHi;
2561            *rLo = tLo;
2562            return;
2563         }
2564
2565         /* Not64(e) */
2566         case Iop_Not64: {
2567            HReg tLo = newVRegI(env);
2568            HReg tHi = newVRegI(env);
2569            HReg sHi, sLo;
2570            iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2571            addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2572            addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2573            addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2574            addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2575            *rHi = tHi;
2576            *rLo = tLo;
2577            return;
2578         }
2579
2580         /* Left64(e) */
2581         case Iop_Left64: {
2582            HReg yLo, yHi;
2583            HReg tLo = newVRegI(env);
2584            HReg tHi = newVRegI(env);
2585            /* yHi:yLo = arg */
2586            iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2587            /* tLo = 0 - yLo, and set carry */
2588            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2589            addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2590            /* tHi = 0 - yHi - carry */
2591            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2592            addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2593            /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2594               back in, so as to give the final result
2595               tHi:tLo = arg | -arg. */
2596            addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2597            addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2598            *rHi = tHi;
2599            *rLo = tLo;
2600            return;
2601         }
2602
2603         /* --- patterns rooted at: CmpwNEZ64 --- */
2604
2605         /* CmpwNEZ64(e) */
2606         case Iop_CmpwNEZ64: {
2607
2608         DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2609         DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2610                        unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2611         if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2612            /* CmpwNEZ64(Or64(x,y)) */
2613            HReg xHi,xLo,yHi,yLo;
2614            HReg xBoth = newVRegI(env);
2615            HReg merged = newVRegI(env);
2616            HReg tmp2 = newVRegI(env);
2617
2618            iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2619            addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2620            addInstr(env, X86Instr_Alu32R(Xalu_OR,
2621                                          X86RMI_Reg(xLo),xBoth));
2622
2623            iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2624            addInstr(env, mk_iMOVsd_RR(yHi,merged));
2625            addInstr(env, X86Instr_Alu32R(Xalu_OR,
2626                                          X86RMI_Reg(yLo),merged));
2627            addInstr(env, X86Instr_Alu32R(Xalu_OR,
2628                                             X86RMI_Reg(xBoth),merged));
2629
2630            /* tmp2 = (merged | -merged) >>s 31 */
2631            addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2632            addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2633            addInstr(env, X86Instr_Alu32R(Xalu_OR,
2634                                          X86RMI_Reg(merged), tmp2));
2635            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2636            *rHi = tmp2;
2637            *rLo = tmp2;
2638            return;
2639         } else {
2640            /* CmpwNEZ64(e) */
2641            HReg srcLo, srcHi;
2642            HReg tmp1  = newVRegI(env);
2643            HReg tmp2  = newVRegI(env);
2644            /* srcHi:srcLo = arg */
2645            iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2646            /* tmp1 = srcHi | srcLo */
2647            addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2648            addInstr(env, X86Instr_Alu32R(Xalu_OR,
2649                                          X86RMI_Reg(srcLo), tmp1));
2650            /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2651            addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2652            addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2653            addInstr(env, X86Instr_Alu32R(Xalu_OR,
2654                                          X86RMI_Reg(tmp1), tmp2));
2655            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2656            *rHi = tmp2;
2657            *rLo = tmp2;
2658            return;
2659         }
2660         }
2661
2662         /* ReinterpF64asI64(e) */
2663         /* Given an IEEE754 double, produce an I64 with the same bit
2664            pattern. */
2665         case Iop_ReinterpF64asI64: {
2666            HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
2667            HReg tLo  = newVRegI(env);
2668            HReg tHi  = newVRegI(env);
2669            X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2670            X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2671            /* paranoia */
2672            set_FPU_rounding_default(env);
2673            /* subl $8, %esp */
2674            sub_from_esp(env, 8);
2675            /* gstD %rf, 0(%esp) */
2676            addInstr(env,
2677                     X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2678            /* movl 0(%esp), %tLo */
2679            addInstr(env,
2680                     X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2681            /* movl 4(%esp), %tHi */
2682            addInstr(env,
2683                     X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2684            /* addl $8, %esp */
2685            add_to_esp(env, 8);
2686            *rHi = tHi;
2687            *rLo = tLo;
2688            return;
2689         }
2690
2691         case Iop_CmpNEZ32x2:
2692            fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2693         case Iop_CmpNEZ16x4:
2694            fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2695         case Iop_CmpNEZ8x8:
2696            fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2697         unish: {
2698            /* Note: the following assumes all helpers are of
2699               signature
2700                  ULong fn ( ULong ), and they are
2701               not marked as regparm functions.
2702            */
2703            HReg xLo, xHi;
2704            HReg tLo = newVRegI(env);
2705            HReg tHi = newVRegI(env);
2706            iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2707            addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2708            addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2709            addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2710            add_to_esp(env, 2*4);
2711            addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2712            addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2713            *rHi = tHi;
2714            *rLo = tLo;
2715            return;
2716         }
2717
2718         default:
2719            break;
2720      }
2721   } /* if (e->tag == Iex_Unop) */
2722
2723
2724   /* --------- CCALL --------- */
2725   if (e->tag == Iex_CCall) {
2726      HReg tLo = newVRegI(env);
2727      HReg tHi = newVRegI(env);
2728
2729      /* Marshal args, do the call, clear stack. */
2730      doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2731
2732      addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2733      addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2734      *rHi = tHi;
2735      *rLo = tLo;
2736      return;
2737   }
2738
2739   ppIRExpr(e);
2740   vpanic("iselInt64Expr");
2741}
2742
2743
2744/*---------------------------------------------------------*/
2745/*--- ISEL: Floating point expressions (32 bit)         ---*/
2746/*---------------------------------------------------------*/
2747
2748/* Nothing interesting here; really just wrappers for
2749   64-bit stuff. */
2750
2751static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2752{
2753   HReg r = iselFltExpr_wrk( env, e );
2754#  if 0
2755   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2756#  endif
2757   vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2758   vassert(hregIsVirtual(r));
2759   return r;
2760}
2761
2762/* DO NOT CALL THIS DIRECTLY */
2763static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2764{
2765   IRType ty = typeOfIRExpr(env->type_env,e);
2766   vassert(ty == Ity_F32);
2767
2768   if (e->tag == Iex_RdTmp) {
2769      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2770   }
2771
2772   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2773      X86AMode* am;
2774      HReg res = newVRegF(env);
2775      vassert(e->Iex.Load.ty == Ity_F32);
2776      am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2777      addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2778      return res;
2779   }
2780
2781   if (e->tag == Iex_Binop
2782       && e->Iex.Binop.op == Iop_F64toF32) {
2783      /* Although the result is still held in a standard FPU register,
2784         we need to round it to reflect the loss of accuracy/range
2785         entailed in casting it to a 32-bit float. */
2786      HReg dst = newVRegF(env);
2787      HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2788      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2789      addInstr(env, X86Instr_Fp64to32(src,dst));
2790      set_FPU_rounding_default( env );
2791      return dst;
2792   }
2793
2794   if (e->tag == Iex_Get) {
2795      X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2796                                  hregX86_EBP() );
2797      HReg res = newVRegF(env);
2798      addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2799      return res;
2800   }
2801
2802   if (e->tag == Iex_Unop
2803       && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2804       /* Given an I32, produce an IEEE754 float with the same bit
2805          pattern. */
2806      HReg    dst = newVRegF(env);
2807      X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2808      /* paranoia */
2809      addInstr(env, X86Instr_Push(rmi));
2810      addInstr(env, X86Instr_FpLdSt(
2811                       True/*load*/, 4, dst,
2812                       X86AMode_IR(0, hregX86_ESP())));
2813      add_to_esp(env, 4);
2814      return dst;
2815   }
2816
2817   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2818      HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
2819      HReg dst = newVRegF(env);
2820
2821      /* rf now holds the value to be rounded.  The first thing to do
2822         is set the FPU's rounding mode accordingly. */
2823
2824      /* Set host rounding mode */
2825      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2826
2827      /* grndint %rf, %dst */
2828      addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2829
2830      /* Restore default FPU rounding. */
2831      set_FPU_rounding_default( env );
2832
2833      return dst;
2834   }
2835
2836   ppIRExpr(e);
2837   vpanic("iselFltExpr_wrk");
2838}
2839
2840
2841/*---------------------------------------------------------*/
2842/*--- ISEL: Floating point expressions (64 bit)         ---*/
2843/*---------------------------------------------------------*/
2844
2845/* Compute a 64-bit floating point value into a register, the identity
2846   of which is returned.  As with iselIntExpr_R, the reg may be either
2847   real or virtual; in any case it must not be changed by subsequent
2848   code emitted by the caller.  */
2849
2850/* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2851
2852    Type                  S (1 bit)   E (11 bits)   F (52 bits)
2853    ----                  ---------   -----------   -----------
2854    signalling NaN        u           2047 (max)    .0uuuuu---u
2855                                                    (with at least
2856                                                     one 1 bit)
2857    quiet NaN             u           2047 (max)    .1uuuuu---u
2858
2859    negative infinity     1           2047 (max)    .000000---0
2860
2861    positive infinity     0           2047 (max)    .000000---0
2862
2863    negative zero         1           0             .000000---0
2864
2865    positive zero         0           0             .000000---0
2866*/
2867
2868static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2869{
2870   HReg r = iselDblExpr_wrk( env, e );
2871#  if 0
2872   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2873#  endif
2874   vassert(hregClass(r) == HRcFlt64);
2875   vassert(hregIsVirtual(r));
2876   return r;
2877}
2878
2879/* DO NOT CALL THIS DIRECTLY */
2880static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2881{
2882   IRType ty = typeOfIRExpr(env->type_env,e);
2883   vassert(e);
2884   vassert(ty == Ity_F64);
2885
2886   if (e->tag == Iex_RdTmp) {
2887      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2888   }
2889
2890   if (e->tag == Iex_Const) {
2891      union { UInt u32x2[2]; ULong u64; Double f64; } u;
2892      HReg freg = newVRegF(env);
2893      vassert(sizeof(u) == 8);
2894      vassert(sizeof(u.u64) == 8);
2895      vassert(sizeof(u.f64) == 8);
2896      vassert(sizeof(u.u32x2) == 8);
2897
2898      if (e->Iex.Const.con->tag == Ico_F64) {
2899         u.f64 = e->Iex.Const.con->Ico.F64;
2900      }
2901      else if (e->Iex.Const.con->tag == Ico_F64i) {
2902         u.u64 = e->Iex.Const.con->Ico.F64i;
2903      }
2904      else
2905         vpanic("iselDblExpr(x86): const");
2906
2907      addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
2908      addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
2909      addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
2910                                    X86AMode_IR(0, hregX86_ESP())));
2911      add_to_esp(env, 8);
2912      return freg;
2913   }
2914
2915   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2916      X86AMode* am;
2917      HReg res = newVRegF(env);
2918      vassert(e->Iex.Load.ty == Ity_F64);
2919      am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2920      addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
2921      return res;
2922   }
2923
2924   if (e->tag == Iex_Get) {
2925      X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2926                                  hregX86_EBP() );
2927      HReg res = newVRegF(env);
2928      addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2929      return res;
2930   }
2931
2932   if (e->tag == Iex_GetI) {
2933      X86AMode* am
2934         = genGuestArrayOffset(
2935              env, e->Iex.GetI.descr,
2936                   e->Iex.GetI.ix, e->Iex.GetI.bias );
2937      HReg res = newVRegF(env);
2938      addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2939      return res;
2940   }
2941
2942   if (e->tag == Iex_Triop) {
2943      X86FpOp fpop = Xfp_INVALID;
2944      switch (e->Iex.Triop.op) {
2945         case Iop_AddF64:    fpop = Xfp_ADD; break;
2946         case Iop_SubF64:    fpop = Xfp_SUB; break;
2947         case Iop_MulF64:    fpop = Xfp_MUL; break;
2948         case Iop_DivF64:    fpop = Xfp_DIV; break;
2949         case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
2950         case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
2951         case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2952         case Iop_AtanF64:   fpop = Xfp_ATAN; break;
2953         case Iop_PRemF64:   fpop = Xfp_PREM; break;
2954         case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
2955         default: break;
2956      }
2957      if (fpop != Xfp_INVALID) {
2958         HReg res  = newVRegF(env);
2959         HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
2960         HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
2961         /* XXXROUNDINGFIXME */
2962         /* set roundingmode here */
2963         addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2964	 if (fpop != Xfp_ADD && fpop != Xfp_SUB
2965	     && fpop != Xfp_MUL && fpop != Xfp_DIV)
2966            roundToF64(env, res);
2967         return res;
2968      }
2969   }
2970
2971   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2972      HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2973      HReg dst = newVRegF(env);
2974
2975      /* rf now holds the value to be rounded.  The first thing to do
2976         is set the FPU's rounding mode accordingly. */
2977
2978      /* Set host rounding mode */
2979      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2980
2981      /* grndint %rf, %dst */
2982      addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2983
2984      /* Restore default FPU rounding. */
2985      set_FPU_rounding_default( env );
2986
2987      return dst;
2988   }
2989
2990   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
2991      HReg dst = newVRegF(env);
2992      HReg rHi,rLo;
2993      iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
2994      addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
2995      addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
2996
2997      /* Set host rounding mode */
2998      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2999
3000      addInstr(env, X86Instr_FpLdStI(
3001                       True/*load*/, 8, dst,
3002                       X86AMode_IR(0, hregX86_ESP())));
3003
3004      /* Restore default FPU rounding. */
3005      set_FPU_rounding_default( env );
3006
3007      add_to_esp(env, 8);
3008      return dst;
3009   }
3010
3011   if (e->tag == Iex_Binop) {
3012      X86FpOp fpop = Xfp_INVALID;
3013      switch (e->Iex.Binop.op) {
3014         case Iop_SinF64:  fpop = Xfp_SIN; break;
3015         case Iop_CosF64:  fpop = Xfp_COS; break;
3016         case Iop_TanF64:  fpop = Xfp_TAN; break;
3017         case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3018         case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3019         default: break;
3020      }
3021      if (fpop != Xfp_INVALID) {
3022         HReg res = newVRegF(env);
3023         HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3024         /* XXXROUNDINGFIXME */
3025         /* set roundingmode here */
3026         addInstr(env, X86Instr_FpUnary(fpop,src,res));
3027	 if (fpop != Xfp_SQRT
3028             && fpop != Xfp_NEG && fpop != Xfp_ABS)
3029            roundToF64(env, res);
3030         return res;
3031      }
3032   }
3033
3034   if (e->tag == Iex_Unop) {
3035      X86FpOp fpop = Xfp_INVALID;
3036      switch (e->Iex.Unop.op) {
3037         case Iop_NegF64:  fpop = Xfp_NEG; break;
3038         case Iop_AbsF64:  fpop = Xfp_ABS; break;
3039         default: break;
3040      }
3041      if (fpop != Xfp_INVALID) {
3042         HReg res = newVRegF(env);
3043         HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3044         addInstr(env, X86Instr_FpUnary(fpop,src,res));
3045	 if (fpop != Xfp_NEG && fpop != Xfp_ABS)
3046            roundToF64(env, res);
3047         return res;
3048      }
3049   }
3050
3051   if (e->tag == Iex_Unop) {
3052      switch (e->Iex.Unop.op) {
3053         case Iop_I32StoF64: {
3054            HReg dst = newVRegF(env);
3055            HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
3056            addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3057            set_FPU_rounding_default(env);
3058            addInstr(env, X86Instr_FpLdStI(
3059                             True/*load*/, 4, dst,
3060                             X86AMode_IR(0, hregX86_ESP())));
3061	    add_to_esp(env, 4);
3062            return dst;
3063         }
3064         case Iop_ReinterpI64asF64: {
3065            /* Given an I64, produce an IEEE754 double with the same
3066               bit pattern. */
3067            HReg dst = newVRegF(env);
3068            HReg rHi, rLo;
3069	    iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3070            /* paranoia */
3071            set_FPU_rounding_default(env);
3072            addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3073            addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3074            addInstr(env, X86Instr_FpLdSt(
3075                             True/*load*/, 8, dst,
3076                             X86AMode_IR(0, hregX86_ESP())));
3077	    add_to_esp(env, 8);
3078            return dst;
3079	 }
3080         case Iop_F32toF64: {
3081            /* this is a no-op */
3082            HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3083            return res;
3084	 }
3085         default:
3086            break;
3087      }
3088   }
3089
3090   /* --------- MULTIPLEX --------- */
3091   if (e->tag == Iex_Mux0X) {
3092     if (ty == Ity_F64
3093         && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
3094        X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3095        HReg rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
3096        HReg r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
3097        HReg dst = newVRegF(env);
3098        addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst));
3099        addInstr(env, X86Instr_Test32(0xFF, r8));
3100        addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst));
3101        return dst;
3102      }
3103   }
3104
3105   ppIRExpr(e);
3106   vpanic("iselDblExpr_wrk");
3107}
3108
3109
3110/*---------------------------------------------------------*/
3111/*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
3112/*---------------------------------------------------------*/
3113
3114static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3115{
3116   HReg r = iselVecExpr_wrk( env, e );
3117#  if 0
3118   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3119#  endif
3120   vassert(hregClass(r) == HRcVec128);
3121   vassert(hregIsVirtual(r));
3122   return r;
3123}
3124
3125
3126/* DO NOT CALL THIS DIRECTLY */
3127static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3128{
3129
3130#  define REQUIRE_SSE1                                    \
3131      do { if (env->hwcaps == 0/*baseline, no sse*/)      \
3132              goto vec_fail;                              \
3133      } while (0)
3134
3135#  define REQUIRE_SSE2                                    \
3136      do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
3137              goto vec_fail;                              \
3138      } while (0)
3139
3140#  define SSE2_OR_ABOVE                                   \
3141       (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3142
3143   HWord     fn = 0; /* address of helper fn, if required */
3144   MatchInfo mi;
3145   Bool      arg1isEReg = False;
3146   X86SseOp  op = Xsse_INVALID;
3147   IRType    ty = typeOfIRExpr(env->type_env,e);
3148   vassert(e);
3149   vassert(ty == Ity_V128);
3150
3151   REQUIRE_SSE1;
3152
3153   if (e->tag == Iex_RdTmp) {
3154      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3155   }
3156
3157   if (e->tag == Iex_Get) {
3158      HReg dst = newVRegV(env);
3159      addInstr(env, X86Instr_SseLdSt(
3160                       True/*load*/,
3161                       dst,
3162                       X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3163                    )
3164              );
3165      return dst;
3166   }
3167
3168   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3169      HReg      dst = newVRegV(env);
3170      X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
3171      addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3172      return dst;
3173   }
3174
3175   if (e->tag == Iex_Const) {
3176      HReg dst = newVRegV(env);
3177      vassert(e->Iex.Const.con->tag == Ico_V128);
3178      addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3179      return dst;
3180   }
3181
3182   if (e->tag == Iex_Unop) {
3183
3184   if (SSE2_OR_ABOVE) {
3185      /* 64UtoV128(LDle:I64(addr)) */
3186      DECLARE_PATTERN(p_zwiden_load64);
3187      DEFINE_PATTERN(p_zwiden_load64,
3188                     unop(Iop_64UtoV128,
3189                          IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3190      if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3191         X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3192         HReg dst = newVRegV(env);
3193         addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3194         return dst;
3195      }
3196   }
3197
3198   switch (e->Iex.Unop.op) {
3199
3200      case Iop_NotV128: {
3201         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3202         return do_sse_Not128(env, arg);
3203      }
3204
3205      case Iop_CmpNEZ64x2: {
3206         /* We can use SSE2 instructions for this. */
3207         /* Ideally, we want to do a 64Ix2 comparison against zero of
3208            the operand.  Problem is no such insn exists.  Solution
3209            therefore is to do a 32Ix4 comparison instead, and bitwise-
3210            negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
3211            let the not'd result of this initial comparison be a:b:c:d.
3212            What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
3213            pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3214            giving the required result.
3215
3216            The required selection sequence is 2,3,0,1, which
3217            according to Intel's documentation means the pshufd
3218            literal value is 0xB1, that is,
3219            (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3220         */
3221         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3222         HReg tmp  = newVRegV(env);
3223         HReg dst  = newVRegV(env);
3224         REQUIRE_SSE2;
3225         addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3226         addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3227         tmp = do_sse_Not128(env, tmp);
3228         addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3229         addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3230         return dst;
3231      }
3232
3233      case Iop_CmpNEZ32x4: {
3234         /* Sigh, we have to generate lousy code since this has to
3235            work on SSE1 hosts */
3236         /* basically, the idea is: for each lane:
3237               movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
3238               sbbl %r, %r               (now %r = 1Sto32(CF))
3239               movl %r, lane
3240         */
3241         Int       i;
3242         X86AMode* am;
3243         X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3244         HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
3245         HReg      dst  = newVRegV(env);
3246         HReg      r32  = newVRegI(env);
3247         sub_from_esp(env, 16);
3248         addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3249         for (i = 0; i < 4; i++) {
3250            am = X86AMode_IR(i*4, hregX86_ESP());
3251            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3252            addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3253            addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3254            addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3255         }
3256         addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3257         add_to_esp(env, 16);
3258         return dst;
3259      }
3260
3261      case Iop_CmpNEZ8x16:
3262      case Iop_CmpNEZ16x8: {
3263         /* We can use SSE2 instructions for this. */
3264         HReg arg;
3265         HReg vec0 = newVRegV(env);
3266         HReg vec1 = newVRegV(env);
3267         HReg dst  = newVRegV(env);
3268         X86SseOp cmpOp
3269            = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3270                                             : Xsse_CMPEQ8;
3271         REQUIRE_SSE2;
3272         addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3273         addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3274         addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3275         /* defer arg computation to here so as to give CMPEQF as long
3276            as possible to complete */
3277         arg = iselVecExpr(env, e->Iex.Unop.arg);
3278         /* vec0 is all 0s; vec1 is all 1s */
3279         addInstr(env, mk_vMOVsd_RR(arg, dst));
3280         /* 16x8 or 8x16 comparison == */
3281         addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3282         /* invert result */
3283         addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3284         return dst;
3285      }
3286
3287      case Iop_Recip32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
3288      case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3289      case Iop_Sqrt32Fx4:  op = Xsse_SQRTF;  goto do_32Fx4_unary;
3290      do_32Fx4_unary:
3291      {
3292         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3293         HReg dst = newVRegV(env);
3294         addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3295         return dst;
3296      }
3297
3298      case Iop_Recip64Fx2: op = Xsse_RCPF;   goto do_64Fx2_unary;
3299      case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3300      case Iop_Sqrt64Fx2:  op = Xsse_SQRTF;  goto do_64Fx2_unary;
3301      do_64Fx2_unary:
3302      {
3303         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3304         HReg dst = newVRegV(env);
3305         REQUIRE_SSE2;
3306         addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3307         return dst;
3308      }
3309
3310      case Iop_Recip32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
3311      case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3312      case Iop_Sqrt32F0x4:  op = Xsse_SQRTF;  goto do_32F0x4_unary;
3313      do_32F0x4_unary:
3314      {
3315         /* A bit subtle.  We have to copy the arg to the result
3316            register first, because actually doing the SSE scalar insn
3317            leaves the upper 3/4 of the destination register
3318            unchanged.  Whereas the required semantics of these
3319            primops is that the upper 3/4 is simply copied in from the
3320            argument. */
3321         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3322         HReg dst = newVRegV(env);
3323         addInstr(env, mk_vMOVsd_RR(arg, dst));
3324         addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3325         return dst;
3326      }
3327
3328      case Iop_Recip64F0x2: op = Xsse_RCPF;   goto do_64F0x2_unary;
3329      case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3330      case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
3331      do_64F0x2_unary:
3332      {
3333         /* A bit subtle.  We have to copy the arg to the result
3334            register first, because actually doing the SSE scalar insn
3335            leaves the upper half of the destination register
3336            unchanged.  Whereas the required semantics of these
3337            primops is that the upper half is simply copied in from the
3338            argument. */
3339         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3340         HReg dst = newVRegV(env);
3341         REQUIRE_SSE2;
3342         addInstr(env, mk_vMOVsd_RR(arg, dst));
3343         addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3344         return dst;
3345      }
3346
3347      case Iop_32UtoV128: {
3348         HReg      dst  = newVRegV(env);
3349         X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3350         X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3351         addInstr(env, X86Instr_Push(rmi));
3352	 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3353         add_to_esp(env, 4);
3354         return dst;
3355      }
3356
3357      case Iop_64UtoV128: {
3358         HReg      rHi, rLo;
3359         HReg      dst  = newVRegV(env);
3360         X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3361         iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3362         addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3363         addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3364	 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3365         add_to_esp(env, 8);
3366         return dst;
3367      }
3368
3369      default:
3370         break;
3371   } /* switch (e->Iex.Unop.op) */
3372   } /* if (e->tag == Iex_Unop) */
3373
3374   if (e->tag == Iex_Binop) {
3375   switch (e->Iex.Binop.op) {
3376
3377      case Iop_SetV128lo32: {
3378         HReg dst = newVRegV(env);
3379         HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3380         HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3381         X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3382         sub_from_esp(env, 16);
3383         addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3384         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3385         addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3386         add_to_esp(env, 16);
3387         return dst;
3388      }
3389
3390      case Iop_SetV128lo64: {
3391         HReg dst = newVRegV(env);
3392         HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3393         HReg srcIhi, srcIlo;
3394         X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3395         X86AMode* esp4 = advance4(esp0);
3396         iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3397         sub_from_esp(env, 16);
3398         addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3399         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3400         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3401         addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3402         add_to_esp(env, 16);
3403         return dst;
3404      }
3405
3406      case Iop_64HLtoV128: {
3407         HReg r3, r2, r1, r0;
3408         X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
3409         X86AMode* esp4  = advance4(esp0);
3410         X86AMode* esp8  = advance4(esp4);
3411         X86AMode* esp12 = advance4(esp8);
3412         HReg dst = newVRegV(env);
3413	 /* do this via the stack (easy, convenient, etc) */
3414         sub_from_esp(env, 16);
3415         /* Do the less significant 64 bits */
3416         iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3417         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3418         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3419         /* Do the more significant 64 bits */
3420         iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3421         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3422         addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3423	 /* Fetch result back from stack. */
3424         addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3425         add_to_esp(env, 16);
3426         return dst;
3427      }
3428
3429      case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3430      case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3431      case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3432      case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3433      case Iop_Add32Fx4:   op = Xsse_ADDF;   goto do_32Fx4;
3434      case Iop_Div32Fx4:   op = Xsse_DIVF;   goto do_32Fx4;
3435      case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
3436      case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
3437      case Iop_Mul32Fx4:   op = Xsse_MULF;   goto do_32Fx4;
3438      case Iop_Sub32Fx4:   op = Xsse_SUBF;   goto do_32Fx4;
3439      do_32Fx4:
3440      {
3441         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3442         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3443         HReg dst = newVRegV(env);
3444         addInstr(env, mk_vMOVsd_RR(argL, dst));
3445         addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3446         return dst;
3447      }
3448
3449      case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3450      case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3451      case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3452      case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3453      case Iop_Add64Fx2:   op = Xsse_ADDF;   goto do_64Fx2;
3454      case Iop_Div64Fx2:   op = Xsse_DIVF;   goto do_64Fx2;
3455      case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
3456      case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
3457      case Iop_Mul64Fx2:   op = Xsse_MULF;   goto do_64Fx2;
3458      case Iop_Sub64Fx2:   op = Xsse_SUBF;   goto do_64Fx2;
3459      do_64Fx2:
3460      {
3461         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3462         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3463         HReg dst = newVRegV(env);
3464         REQUIRE_SSE2;
3465         addInstr(env, mk_vMOVsd_RR(argL, dst));
3466         addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3467         return dst;
3468      }
3469
3470      case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3471      case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3472      case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3473      case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3474      case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
3475      case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
3476      case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
3477      case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
3478      case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
3479      case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
3480      do_32F0x4: {
3481         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3482         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3483         HReg dst = newVRegV(env);
3484         addInstr(env, mk_vMOVsd_RR(argL, dst));
3485         addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3486         return dst;
3487      }
3488
3489      case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3490      case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3491      case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3492      case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3493      case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
3494      case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
3495      case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
3496      case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
3497      case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
3498      case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
3499      do_64F0x2: {
3500         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3501         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3502         HReg dst = newVRegV(env);
3503         REQUIRE_SSE2;
3504         addInstr(env, mk_vMOVsd_RR(argL, dst));
3505         addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3506         return dst;
3507      }
3508
3509      case Iop_QNarrowBin32Sto16Sx8:
3510         op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3511      case Iop_QNarrowBin16Sto8Sx16:
3512         op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3513      case Iop_QNarrowBin16Sto8Ux16:
3514         op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3515
3516      case Iop_InterleaveHI8x16:
3517         op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3518      case Iop_InterleaveHI16x8:
3519         op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3520      case Iop_InterleaveHI32x4:
3521         op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3522      case Iop_InterleaveHI64x2:
3523         op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3524
3525      case Iop_InterleaveLO8x16:
3526         op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3527      case Iop_InterleaveLO16x8:
3528         op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3529      case Iop_InterleaveLO32x4:
3530         op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3531      case Iop_InterleaveLO64x2:
3532         op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3533
3534      case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
3535      case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
3536      case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
3537      case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
3538      case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
3539      case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
3540      case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
3541      case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
3542      case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
3543      case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
3544      case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
3545      case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
3546      case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
3547      case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
3548      case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
3549      case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
3550      case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
3551      case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3552      case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3553      case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
3554      case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
3555      case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
3556      case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
3557      case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3558      case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3559      case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
3560      case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
3561      case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
3562      case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
3563      case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
3564      case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
3565      case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
3566      case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
3567      case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
3568      do_SseReRg: {
3569         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3570         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3571         HReg dst = newVRegV(env);
3572         if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3573            REQUIRE_SSE2;
3574         if (arg1isEReg) {
3575            addInstr(env, mk_vMOVsd_RR(arg2, dst));
3576            addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3577         } else {
3578            addInstr(env, mk_vMOVsd_RR(arg1, dst));
3579            addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3580         }
3581         return dst;
3582      }
3583
3584      case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3585      case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3586      case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3587      case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3588      case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3589      case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3590      case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3591      case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3592      do_SseShift: {
3593         HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
3594         X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3595         X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3596         HReg      ereg = newVRegV(env);
3597         HReg      dst  = newVRegV(env);
3598         REQUIRE_SSE2;
3599         addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3600         addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3601         addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3602         addInstr(env, X86Instr_Push(rmi));
3603         addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3604	 addInstr(env, mk_vMOVsd_RR(greg, dst));
3605         addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3606         add_to_esp(env, 16);
3607         return dst;
3608      }
3609
3610      case Iop_NarrowBin32to16x8:
3611         fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3612         goto do_SseAssistedBinary;
3613      case Iop_NarrowBin16to8x16:
3614         fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3615         goto do_SseAssistedBinary;
3616      do_SseAssistedBinary: {
3617         /* As with the amd64 case (where this is copied from) we
3618            generate pretty bad code. */
3619         vassert(fn != 0);
3620         HReg dst = newVRegV(env);
3621         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3622         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3623         HReg argp = newVRegI(env);
3624         /* subl $112, %esp         -- make a space */
3625         sub_from_esp(env, 112);
3626         /* leal 48(%esp), %r_argp  -- point into it */
3627         addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3628                                      argp));
3629         /* andl $-16, %r_argp      -- 16-align the pointer */
3630         addInstr(env, X86Instr_Alu32R(Xalu_AND,
3631                                       X86RMI_Imm( ~(UInt)15 ),
3632                                       argp));
3633         /* Prepare 3 arg regs:
3634            leal  0(%r_argp), %eax
3635            leal 16(%r_argp), %edx
3636            leal 32(%r_argp), %ecx
3637         */
3638         addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3639                                      hregX86_EAX()));
3640         addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3641                                      hregX86_EDX()));
3642         addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3643                                      hregX86_ECX()));
3644         /* Store the two args, at (%edx) and (%ecx):
3645            movupd  %argL, 0(%edx)
3646            movupd  %argR, 0(%ecx)
3647         */
3648         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3649                                        X86AMode_IR(0, hregX86_EDX())));
3650         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3651                                        X86AMode_IR(0, hregX86_ECX())));
3652         /* call the helper */
3653         addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 ));
3654         /* fetch the result from memory, using %r_argp, which the
3655            register allocator will keep alive across the call. */
3656         addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3657                                        X86AMode_IR(0, argp)));
3658         /* and finally, clear the space */
3659         add_to_esp(env, 112);
3660         return dst;
3661      }
3662
3663      default:
3664         break;
3665   } /* switch (e->Iex.Binop.op) */
3666   } /* if (e->tag == Iex_Binop) */
3667
3668   if (e->tag == Iex_Mux0X) {
3669      X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3670      HReg rX  = iselVecExpr(env, e->Iex.Mux0X.exprX);
3671      HReg r0  = iselVecExpr(env, e->Iex.Mux0X.expr0);
3672      HReg dst = newVRegV(env);
3673      addInstr(env, mk_vMOVsd_RR(rX,dst));
3674      addInstr(env, X86Instr_Test32(0xFF, r8));
3675      addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3676      return dst;
3677   }
3678
3679   vec_fail:
3680   vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3681              LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3682   ppIRExpr(e);
3683   vpanic("iselVecExpr_wrk");
3684
3685#  undef REQUIRE_SSE1
3686#  undef REQUIRE_SSE2
3687#  undef SSE2_OR_ABOVE
3688}
3689
3690
3691/*---------------------------------------------------------*/
3692/*--- ISEL: Statements                                  ---*/
3693/*---------------------------------------------------------*/
3694
3695static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3696{
3697   if (vex_traceflags & VEX_TRACE_VCODE) {
3698      vex_printf("\n-- ");
3699      ppIRStmt(stmt);
3700      vex_printf("\n");
3701   }
3702
3703   switch (stmt->tag) {
3704
3705   /* --------- STORE --------- */
3706   case Ist_Store: {
3707      IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3708      IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3709      IREndness end   = stmt->Ist.Store.end;
3710
3711      if (tya != Ity_I32 || end != Iend_LE)
3712         goto stmt_fail;
3713
3714      if (tyd == Ity_I32) {
3715         X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3716         X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3717         addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3718         return;
3719      }
3720      if (tyd == Ity_I8 || tyd == Ity_I16) {
3721         X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3722         HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3723         addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3724                                       r,am ));
3725         return;
3726      }
3727      if (tyd == Ity_F64) {
3728         X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3729         HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3730         addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3731         return;
3732      }
3733      if (tyd == Ity_F32) {
3734         X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3735         HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3736         addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3737         return;
3738      }
3739      if (tyd == Ity_I64) {
3740         HReg vHi, vLo, rA;
3741         iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3742         rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3743         addInstr(env, X86Instr_Alu32M(
3744                          Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3745         addInstr(env, X86Instr_Alu32M(
3746                          Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3747         return;
3748      }
3749      if (tyd == Ity_V128) {
3750         X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3751         HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3752         addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3753         return;
3754      }
3755      break;
3756   }
3757
3758   /* --------- PUT --------- */
3759   case Ist_Put: {
3760      IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3761      if (ty == Ity_I32) {
3762         /* We're going to write to memory, so compute the RHS into an
3763            X86RI. */
3764         X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3765         addInstr(env,
3766                  X86Instr_Alu32M(
3767                     Xalu_MOV,
3768                     ri,
3769                     X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3770                 ));
3771         return;
3772      }
3773      if (ty == Ity_I8 || ty == Ity_I16) {
3774         HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3775         addInstr(env, X86Instr_Store(
3776                          toUChar(ty==Ity_I8 ? 1 : 2),
3777                          r,
3778                          X86AMode_IR(stmt->Ist.Put.offset,
3779                                      hregX86_EBP())));
3780         return;
3781      }
3782      if (ty == Ity_I64) {
3783         HReg vHi, vLo;
3784         X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3785         X86AMode* am4 = advance4(am);
3786         iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3787         addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3788         addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3789         return;
3790      }
3791      if (ty == Ity_V128) {
3792         HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
3793         X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3794         addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3795         return;
3796      }
3797      if (ty == Ity_F32) {
3798         HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3799         X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3800         set_FPU_rounding_default(env); /* paranoia */
3801         addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3802         return;
3803      }
3804      if (ty == Ity_F64) {
3805         HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3806         X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3807         set_FPU_rounding_default(env); /* paranoia */
3808         addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3809         return;
3810      }
3811      break;
3812   }
3813
3814   /* --------- Indexed PUT --------- */
3815   case Ist_PutI: {
3816      X86AMode* am
3817         = genGuestArrayOffset(
3818              env, stmt->Ist.PutI.descr,
3819                   stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3820
3821      IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3822      if (ty == Ity_F64) {
3823         HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3824         addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3825         return;
3826      }
3827      if (ty == Ity_I8) {
3828         HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3829         addInstr(env, X86Instr_Store( 1, r, am ));
3830         return;
3831      }
3832      if (ty == Ity_I32) {
3833         HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3834         addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
3835         return;
3836      }
3837      if (ty == Ity_I64) {
3838         HReg rHi, rLo;
3839         X86AMode* am4 = advance4(am);
3840         iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data);
3841         addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
3842         addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
3843         return;
3844      }
3845      break;
3846   }
3847
3848   /* --------- TMP --------- */
3849   case Ist_WrTmp: {
3850      IRTemp tmp = stmt->Ist.WrTmp.tmp;
3851      IRType ty = typeOfIRTemp(env->type_env, tmp);
3852
3853      /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
3854         compute it into an AMode and then use LEA.  This usually
3855         produces fewer instructions, often because (for memcheck
3856         created IR) we get t = address-expression, (t is later used
3857         twice) and so doing this naturally turns address-expression
3858         back into an X86 amode. */
3859      if (ty == Ity_I32
3860          && stmt->Ist.WrTmp.data->tag == Iex_Binop
3861          && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
3862         X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3863         HReg dst = lookupIRTemp(env, tmp);
3864         if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
3865            /* Hmm, iselIntExpr_AMode wimped out and just computed the
3866               value into a register.  Just emit a normal reg-reg move
3867               so reg-alloc can coalesce it away in the usual way. */
3868            HReg src = am->Xam.IR.reg;
3869            addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
3870         } else {
3871            addInstr(env, X86Instr_Lea32(am,dst));
3872         }
3873         return;
3874      }
3875
3876      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3877         X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3878         HReg dst = lookupIRTemp(env, tmp);
3879         addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
3880         return;
3881      }
3882      if (ty == Ity_I64) {
3883         HReg rHi, rLo, dstHi, dstLo;
3884         iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3885         lookupIRTemp64( &dstHi, &dstLo, env, tmp);
3886         addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3887         addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3888         return;
3889      }
3890      if (ty == Ity_I1) {
3891         X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3892         HReg dst = lookupIRTemp(env, tmp);
3893         addInstr(env, X86Instr_Set32(cond, dst));
3894         return;
3895      }
3896      if (ty == Ity_F64) {
3897         HReg dst = lookupIRTemp(env, tmp);
3898         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3899         addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3900         return;
3901      }
3902      if (ty == Ity_F32) {
3903         HReg dst = lookupIRTemp(env, tmp);
3904         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3905         addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3906         return;
3907      }
3908      if (ty == Ity_V128) {
3909         HReg dst = lookupIRTemp(env, tmp);
3910         HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3911         addInstr(env, mk_vMOVsd_RR(src,dst));
3912         return;
3913      }
3914      break;
3915   }
3916
3917   /* --------- Call to DIRTY helper --------- */
3918   case Ist_Dirty: {
3919      IRType   retty;
3920      IRDirty* d = stmt->Ist.Dirty.details;
3921      Bool     passBBP = False;
3922
3923      if (d->nFxState == 0)
3924         vassert(!d->needsBBP);
3925
3926      passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3927
3928      /* Marshal args, do the call, clear stack. */
3929      doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3930
3931      /* Now figure out what to do with the returned value, if any. */
3932      if (d->tmp == IRTemp_INVALID)
3933         /* No return value.  Nothing to do. */
3934         return;
3935
3936      retty = typeOfIRTemp(env->type_env, d->tmp);
3937      if (retty == Ity_I64) {
3938         HReg dstHi, dstLo;
3939         /* The returned value is in %edx:%eax.  Park it in the
3940            register-pair associated with tmp. */
3941         lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
3942         addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
3943         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
3944         return;
3945      }
3946      if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
3947         /* The returned value is in %eax.  Park it in the register
3948            associated with tmp. */
3949         HReg dst = lookupIRTemp(env, d->tmp);
3950         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
3951         return;
3952      }
3953      break;
3954   }
3955
3956   /* --------- MEM FENCE --------- */
3957   case Ist_MBE:
3958      switch (stmt->Ist.MBE.event) {
3959         case Imbe_Fence:
3960            addInstr(env, X86Instr_MFence(env->hwcaps));
3961            return;
3962         default:
3963            break;
3964      }
3965      break;
3966
3967   /* --------- ACAS --------- */
3968   case Ist_CAS:
3969      if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3970         /* "normal" singleton CAS */
3971         UChar  sz;
3972         IRCAS* cas = stmt->Ist.CAS.details;
3973         IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
3974         /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3975         X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3976         HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3977         HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3978         HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
3979         vassert(cas->expdHi == NULL);
3980         vassert(cas->dataHi == NULL);
3981         addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3982         addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
3983         addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
3984         switch (ty) {
3985            case Ity_I32: sz = 4; break;
3986            case Ity_I16: sz = 2; break;
3987            case Ity_I8:  sz = 1; break;
3988            default: goto unhandled_cas;
3989         }
3990         addInstr(env, X86Instr_ACAS(am, sz));
3991         addInstr(env,
3992                  X86Instr_CMov32(Xcc_NZ,
3993                                  X86RM_Reg(hregX86_EAX()), rOldLo));
3994         return;
3995      } else {
3996         /* double CAS */
3997         IRCAS* cas = stmt->Ist.CAS.details;
3998         IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
3999         /* only 32-bit allowed in this case */
4000         /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4001         /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4002         X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4003         HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4004         HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4005         HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4006         HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4007         HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
4008         HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4009         if (ty != Ity_I32)
4010            goto unhandled_cas;
4011         addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4012         addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4013         addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4014         addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4015         addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4016         addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4017         addInstr(env, X86Instr_DACAS(am));
4018         addInstr(env,
4019                  X86Instr_CMov32(Xcc_NZ,
4020                                  X86RM_Reg(hregX86_EDX()), rOldHi));
4021         addInstr(env,
4022                  X86Instr_CMov32(Xcc_NZ,
4023                                  X86RM_Reg(hregX86_EAX()), rOldLo));
4024         return;
4025      }
4026      unhandled_cas:
4027      break;
4028
4029   /* --------- INSTR MARK --------- */
4030   /* Doesn't generate any executable code ... */
4031   case Ist_IMark:
4032       return;
4033
4034   /* --------- NO-OP --------- */
4035   /* Fairly self-explanatory, wouldn't you say? */
4036   case Ist_NoOp:
4037       return;
4038
4039   /* --------- EXIT --------- */
4040   case Ist_Exit: {
4041      X86RI*      dst;
4042      X86CondCode cc;
4043      if (stmt->Ist.Exit.dst->tag != Ico_U32)
4044         vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
4045      dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
4046      cc  = iselCondCode(env,stmt->Ist.Exit.guard);
4047      addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
4048      return;
4049   }
4050
4051   default: break;
4052   }
4053  stmt_fail:
4054   ppIRStmt(stmt);
4055   vpanic("iselStmt");
4056}
4057
4058
4059/*---------------------------------------------------------*/
4060/*--- ISEL: Basic block terminators (Nexts)             ---*/
4061/*---------------------------------------------------------*/
4062
4063static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
4064{
4065   X86RI* ri;
4066   if (vex_traceflags & VEX_TRACE_VCODE) {
4067      vex_printf("\n-- goto {");
4068      ppIRJumpKind(jk);
4069      vex_printf("} ");
4070      ppIRExpr(next);
4071      vex_printf("\n");
4072   }
4073   ri = iselIntExpr_RI(env, next);
4074   addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
4075}
4076
4077
4078/*---------------------------------------------------------*/
4079/*--- Insn selector top-level                           ---*/
4080/*---------------------------------------------------------*/
4081
4082/* Translate an entire SB to x86 code. */
4083
4084HInstrArray* iselSB_X86 ( IRSB* bb, VexArch      arch_host,
4085                                    VexArchInfo* archinfo_host,
4086                                    VexAbiInfo*  vbi/*UNUSED*/ )
4087{
4088   Int      i, j;
4089   HReg     hreg, hregHI;
4090   ISelEnv* env;
4091   UInt     hwcaps_host = archinfo_host->hwcaps;
4092
4093   /* sanity ... */
4094   vassert(arch_host == VexArchX86);
4095   vassert(0 == (hwcaps_host
4096                 & ~(VEX_HWCAPS_X86_SSE1
4097                     | VEX_HWCAPS_X86_SSE2
4098                     | VEX_HWCAPS_X86_SSE3
4099                     | VEX_HWCAPS_X86_LZCNT)));
4100
4101   /* Make up an initial environment to use. */
4102   env = LibVEX_Alloc(sizeof(ISelEnv));
4103   env->vreg_ctr = 0;
4104
4105   /* Set up output code array. */
4106   env->code = newHInstrArray();
4107
4108   /* Copy BB's type env. */
4109   env->type_env = bb->tyenv;
4110
4111   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4112      change as we go along. */
4113   env->n_vregmap = bb->tyenv->types_used;
4114   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4115   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4116
4117   /* and finally ... */
4118   env->hwcaps = hwcaps_host;
4119
4120   /* For each IR temporary, allocate a suitably-kinded virtual
4121      register. */
4122   j = 0;
4123   for (i = 0; i < env->n_vregmap; i++) {
4124      hregHI = hreg = INVALID_HREG;
4125      switch (bb->tyenv->types[i]) {
4126         case Ity_I1:
4127         case Ity_I8:
4128         case Ity_I16:
4129         case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
4130         case Ity_I64:  hreg   = mkHReg(j++, HRcInt32, True);
4131                        hregHI = mkHReg(j++, HRcInt32, True); break;
4132         case Ity_F32:
4133         case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
4134         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
4135         default: ppIRType(bb->tyenv->types[i]);
4136                  vpanic("iselBB: IRTemp type");
4137      }
4138      env->vregmap[i]   = hreg;
4139      env->vregmapHI[i] = hregHI;
4140   }
4141   env->vreg_ctr = j;
4142
4143   /* Ok, finally we can iterate over the statements. */
4144   for (i = 0; i < bb->stmts_used; i++)
4145      iselStmt(env,bb->stmts[i]);
4146
4147   iselNext(env,bb->next,bb->jumpkind);
4148
4149   /* record the number of vregs we used. */
4150   env->code->n_vregs = env->vreg_ctr;
4151   return env->code;
4152}
4153
4154
4155/*---------------------------------------------------------------*/
4156/*--- end                                     host_x86_isel.c ---*/
4157/*---------------------------------------------------------------*/
4158