1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_ppc_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
40#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
45#include "host_ppc_defs.h"
46
47/* GPR register class for ppc32/64 */
48#define HRcGPR(__mode64) (__mode64 ? HRcInt64 : HRcInt32)
49
50
51/*---------------------------------------------------------*/
52/*--- Register Usage Conventions                        ---*/
53/*---------------------------------------------------------*/
54/*
55  Integer Regs
56  ------------
57  GPR0       Reserved
58  GPR1       Stack Pointer
59  GPR2       not used - TOC pointer
60  GPR3:10    Allocateable
61  GPR11      if mode64: not used - calls by ptr / env ptr for some langs
62  GPR12      if mode64: not used - exceptions / global linkage code
63  GPR13      not used - Thread-specific pointer
64  GPR14:28   Allocateable
65  GPR29      Unused by us (reserved for the dispatcher)
66  GPR30      AltiVec temp spill register
67  GPR31      GuestStatePointer
68
69  Of Allocateable regs:
70  if (mode64)
71    GPR3:10  Caller-saved regs
72  else
73    GPR3:12  Caller-saved regs
74  GPR14:29   Callee-saved regs
75
76  GPR3       [Return | Parameter] - carrying reg
77  GPR4:10    Parameter-carrying regs
78
79
80  Floating Point Regs
81  -------------------
82  FPR0:31    Allocateable
83
84  FPR0       Caller-saved - scratch reg
85  if (mode64)
86    FPR1:13  Caller-saved - param & return regs
87  else
88    FPR1:8   Caller-saved - param & return regs
89    FPR9:13  Caller-saved regs
90  FPR14:31   Callee-saved regs
91
92
93  Vector Regs (on processors with the VMX feature)
94  -----------
95  VR0-VR1    Volatile scratch registers
96  VR2-VR13   Volatile vector parameters registers
97  VR14-VR19  Volatile scratch registers
98  VR20-VR31  Non-volatile registers
99  VRSAVE     Non-volatile 32-bit register
100*/
101
102
103/*---------------------------------------------------------*/
104/*--- PPC FP Status & Control Register Conventions      ---*/
105/*---------------------------------------------------------*/
106/*
107  Vex-generated code expects to run with the FPU set as follows: all
108  exceptions masked.  The rounding mode is set appropriately before
109  each floating point insn emitted (or left unchanged if known to be
110  correct already).  There are a few fp insns (fmr,fneg,fabs,fnabs),
111  which are unaffected by the rm and so the rounding mode is not set
112  prior to them.
113
114  At least on MPC7447A (Mac Mini), frsqrte is also not affected by
115  rounding mode.  At some point the ppc docs get sufficiently vague
116  that the only way to find out is to write test programs.
117*/
118/* Notes on the FP instruction set, 6 Feb 06.
119
120What                 exns -> CR1 ?   Sets FPRF ?   Observes RM ?
121-------------------------------------------------------------
122
123fmr[.]                   if .             n             n
124fneg[.]                  if .             n             n
125fabs[.]                  if .             n             n
126fnabs[.]                 if .             n             n
127
128fadd[.]                  if .             y             y
129fadds[.]                 if .             y             y
130fcfid[.] (Si64->dbl)     if .             y             y
131fcfidU[.] (Ui64->dbl)    if .             y             y
132fcfids[.] (Si64->sngl)   if .             Y             Y
133fcfidus[.] (Ui64->sngl)  if .             Y             Y
134fcmpo (cmp, result       n                n             n
135fcmpu  to crfD)          n                n             n
136fctid[.]  (dbl->i64)     if .       ->undef             y
137fctidz[.] (dbl->i64)     if .       ->undef    rounds-to-zero
138fctiw[.]  (dbl->i32)     if .       ->undef             y
139fctiwz[.] (dbl->i32)     if .       ->undef    rounds-to-zero
140fdiv[.]                  if .             y             y
141fdivs[.]                 if .             y             y
142fmadd[.]                 if .             y             y
143fmadds[.]                if .             y             y
144fmsub[.]                 if .             y             y
145fmsubs[.]                if .             y             y
146fmul[.]                  if .             y             y
147fmuls[.]                 if .             y             y
148
149(note: for fnm*, rounding happens before final negation)
150fnmadd[.]                if .             y             y
151fnmadds[.]               if .             y             y
152fnmsub[.]                if .             y             y
153fnmsubs[.]               if .             y             y
154
155fre[.]                   if .             y             y
156fres[.]                  if .             y             y
157
158frsqrte[.]               if .             y       apparently not
159
160fsqrt[.]                 if .             y             y
161fsqrts[.]                if .             y             y
162fsub[.]                  if .             y             y
163fsubs[.]                 if .             y             y
164
165
166fpscr: bits 30-31 (ibm) is RM
167            24-29 (ibm) are exnmasks/non-IEEE bit, all zero
168	    15-19 (ibm) is FPRF: class, <, =, >, UNord
169
170ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
171in future)
172
173mcrfs     - move fpscr field to CR field
174mtfsfi[.] - 4 bit imm moved to fpscr field
175mtfsf[.]  - move frS[low 1/2] to fpscr but using 8-bit field mask
176mtfsb1[.] - set given fpscr bit
177mtfsb0[.] - clear given fpscr bit
178mffs[.]   - move all fpscr to frD[low 1/2]
179
180For [.] presumably cr1 is set with exn summary bits, as per
181main FP insns
182
183A single precision store truncates/denormalises the in-register value,
184but does not round it.  This is so that flds followed by fsts is
185always the identity.
186*/
187
188
189/*---------------------------------------------------------*/
190/*--- misc helpers                                      ---*/
191/*---------------------------------------------------------*/
192
193/* These are duplicated in guest-ppc/toIR.c */
194static IRExpr* unop ( IROp op, IRExpr* a )
195{
196   return IRExpr_Unop(op, a);
197}
198
199static IRExpr* mkU32 ( UInt i )
200{
201   return IRExpr_Const(IRConst_U32(i));
202}
203
204static IRExpr* bind ( Int binder )
205{
206   return IRExpr_Binder(binder);
207}
208
209static Bool isZeroU8 ( IRExpr* e )
210{
211   return e->tag == Iex_Const
212          && e->Iex.Const.con->tag == Ico_U8
213          && e->Iex.Const.con->Ico.U8 == 0;
214}
215
216
217/*---------------------------------------------------------*/
218/*--- ISelEnv                                           ---*/
219/*---------------------------------------------------------*/
220
221/* This carries around:
222
223   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
224     might encounter.  This is computed before insn selection starts,
225     and does not change.
226
227   - A mapping from IRTemp to HReg.  This tells the insn selector
228     which virtual register(s) are associated with each IRTemp
229     temporary.  This is computed before insn selection starts, and
230     does not change.  We expect this mapping to map precisely the
231     same set of IRTemps as the type mapping does.
232
233         - vregmapLo    holds the primary register for the IRTemp.
234         - vregmapMedLo holds the secondary register for the IRTemp,
235              if any is needed.  That's only for Ity_I64 temps
236              in 32 bit mode or Ity_I128 temps in 64-bit mode.
237         - vregmapMedHi is only for dealing with Ity_I128 temps in
238              32 bit mode.  It holds bits 95:64 (Intel numbering)
239              of the IRTemp.
240         - vregmapHi is also only for dealing with Ity_I128 temps
241              in 32 bit mode.  It holds the most significant bits
242              (127:96 in Intel numbering) of the IRTemp.
243
244    - The code array, that is, the insns selected so far.
245
246    - A counter, for generating new virtual registers.
247
248    - The host subarchitecture we are selecting insns for.
249      This is set at the start and does not change.
250
251    - A Bool to tell us if the host is 32 or 64bit.
252      This is set at the start and does not change.
253
254    - An IRExpr*, which may be NULL, holding the IR expression (an
255      IRRoundingMode-encoded value) to which the FPU's rounding mode
256      was most recently set.  Setting to NULL is always safe.  Used to
257      avoid redundant settings of the FPU's rounding mode, as
258      described in set_FPU_rounding_mode below.
259
260    - A VexMiscInfo*, needed for knowing how to generate
261      function calls for this target.
262
263    - The maximum guest address of any guest insn in this block.
264      Actually, the address of the highest-addressed byte from any
265      insn in this block.  Is set at the start and does not change.
266      This is used for detecting jumps which are definitely
267      forward-edges from this block, and therefore can be made
268      (chained) to the fast entry point of the destination, thereby
269      avoiding the destination's event check.
270*/
271
272typedef
273   struct {
274      /* Constant -- are set at the start and do not change. */
275      IRTypeEnv* type_env;
276                              //    64-bit mode              32-bit mode
277      HReg*    vregmapLo;     // Low 64-bits [63:0]    Low 32-bits     [31:0]
278      HReg*    vregmapMedLo;  // high 64-bits[127:64]  Next 32-bits    [63:32]
279      HReg*    vregmapMedHi;  // unused                Next 32-bits    [95:64]
280      HReg*    vregmapHi;     // unused                highest 32-bits [127:96]
281      Int      n_vregmap;
282
283      /* 27 Jan 06: Not currently used, but should be */
284      UInt         hwcaps;
285
286      Bool         mode64;
287
288      VexAbiInfo*  vbi;
289
290      Bool         chainingAllowed;
291      Addr64       max_ga;
292
293      /* These are modified as we go along. */
294      HInstrArray* code;
295      Int          vreg_ctr;
296
297      IRExpr*      previous_rm;
298   }
299   ISelEnv;
300
301
302static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
303{
304   vassert(tmp >= 0);
305   vassert(tmp < env->n_vregmap);
306   return env->vregmapLo[tmp];
307}
308
309static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
310                               ISelEnv* env, IRTemp tmp )
311{
312   vassert(tmp >= 0);
313   vassert(tmp < env->n_vregmap);
314   vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
315   *vrLO = env->vregmapLo[tmp];
316   *vrHI = env->vregmapMedLo[tmp];
317}
318
319/* Only for used in 32-bit mode */
320static void lookupIRTempQuad ( HReg* vrHi, HReg* vrMedHi, HReg* vrMedLo,
321                               HReg* vrLo, ISelEnv* env, IRTemp tmp )
322{
323   vassert(!env->mode64);
324   vassert(tmp >= 0);
325   vassert(tmp < env->n_vregmap);
326   vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
327   *vrHi    = env->vregmapHi[tmp];
328   *vrMedHi = env->vregmapMedHi[tmp];
329   *vrMedLo = env->vregmapMedLo[tmp];
330   *vrLo    = env->vregmapLo[tmp];
331}
332
333static void addInstr ( ISelEnv* env, PPCInstr* instr )
334{
335   addHInstr(env->code, instr);
336   if (vex_traceflags & VEX_TRACE_VCODE) {
337      ppPPCInstr(instr, env->mode64);
338      vex_printf("\n");
339   }
340}
341
342static HReg newVRegI ( ISelEnv* env )
343{
344   HReg reg = mkHReg(env->vreg_ctr, HRcGPR(env->mode64),
345                     True/*virtual reg*/);
346   env->vreg_ctr++;
347   return reg;
348}
349
350static HReg newVRegF ( ISelEnv* env )
351{
352   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
353   env->vreg_ctr++;
354   return reg;
355}
356
357static HReg newVRegV ( ISelEnv* env )
358{
359   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
360   env->vreg_ctr++;
361   return reg;
362}
363
364
365/*---------------------------------------------------------*/
366/*--- ISEL: Forward declarations                        ---*/
367/*---------------------------------------------------------*/
368
369/* These are organised as iselXXX and iselXXX_wrk pairs.  The
370   iselXXX_wrk do the real work, but are not to be called directly.
371   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
372   checks that all returned registers are virtual.  You should not
373   call the _wrk version directly.
374
375   'Word' refers to the size of the native machine word, that is,
376   32-bit int in 32-bit mode and 64-bit int in 64-bit mode.  '2Word'
377   therefore refers to a double-width (64/128-bit) quantity in two
378   integer registers.
379*/
380/* 32-bit mode: compute an I8/I16/I32 into a GPR.
381   64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
382static HReg          iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e );
383static HReg          iselWordExpr_R     ( ISelEnv* env, IRExpr* e );
384
385/* 32-bit mode: Compute an I8/I16/I32 into a RH
386                (reg-or-halfword-immediate).
387   64-bit mode: Compute an I8/I16/I32/I64 into a RH
388                (reg-or-halfword-immediate).
389   It's important to specify whether the immediate is to be regarded
390   as signed or not.  If yes, this will never return -32768 as an
391   immediate; this guaranteed that all signed immediates that are
392   return can have their sign inverted if need be.
393*/
394static PPCRH*        iselWordExpr_RH_wrk ( ISelEnv* env,
395                                           Bool syned, IRExpr* e );
396static PPCRH*        iselWordExpr_RH     ( ISelEnv* env,
397                                           Bool syned, IRExpr* e );
398
399/* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
400   64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
401static PPCRI*        iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
402static PPCRI*        iselWordExpr_RI     ( ISelEnv* env, IRExpr* e );
403
404/* In 32 bit mode ONLY, compute an I8 into a
405   reg-or-5-bit-unsigned-immediate, the latter being an immediate in
406   the range 1 .. 31 inclusive.  Used for doing shift amounts. */
407static PPCRH*        iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e );
408static PPCRH*        iselWordExpr_RH5u     ( ISelEnv* env, IRExpr* e );
409
410/* In 64-bit mode ONLY, compute an I8 into a
411   reg-or-6-bit-unsigned-immediate, the latter being an immediate in
412   the range 1 .. 63 inclusive.  Used for doing shift amounts. */
413static PPCRH*        iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e );
414static PPCRH*        iselWordExpr_RH6u     ( ISelEnv* env, IRExpr* e );
415
416/* 32-bit mode: compute an I32 into an AMode.
417   64-bit mode: compute an I64 into an AMode.
418
419   Requires to know (xferTy) the type of data to be loaded/stored
420   using this amode.  That is so that, for 64-bit code generation, any
421   PPCAMode_IR returned will have an index (immediate offset) field
422   that is guaranteed to be 4-aligned, if there is any chance that the
423   amode is to be used in ld/ldu/lda/std/stdu.
424
425   Since there are no such restrictions on 32-bit insns, xferTy is
426   ignored for 32-bit code generation. */
427static PPCAMode*     iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType xferTy );
428static PPCAMode*     iselWordExpr_AMode     ( ISelEnv* env, IRExpr* e, IRType xferTy );
429
430static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
431                                         HReg* rMedLo, HReg* rLo,
432                                         ISelEnv* env, IRExpr* e );
433static void iselInt128Expr_to_32x4     ( HReg* rHi, HReg* rMedHi,
434                                         HReg* rMedLo, HReg* rLo,
435                                         ISelEnv* env, IRExpr* e );
436
437
438/* 32-bit mode ONLY: compute an I64 into a GPR pair. */
439static void          iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
440                                         ISelEnv* env, IRExpr* e );
441static void          iselInt64Expr     ( HReg* rHi, HReg* rLo,
442                                         ISelEnv* env, IRExpr* e );
443
444/* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
445static void          iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
446                                          ISelEnv* env, IRExpr* e );
447static void          iselInt128Expr     ( HReg* rHi, HReg* rLo,
448                                          ISelEnv* env, IRExpr* e );
449
450static PPCCondCode   iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
451static PPCCondCode   iselCondCode     ( ISelEnv* env, IRExpr* e );
452
453static HReg          iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
454static HReg          iselDblExpr     ( ISelEnv* env, IRExpr* e );
455
456static HReg          iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
457static HReg          iselFltExpr     ( ISelEnv* env, IRExpr* e );
458
459static HReg          iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
460static HReg          iselVecExpr     ( ISelEnv* env, IRExpr* e );
461
462/* 64-bit mode ONLY. */
463static HReg          iselDfp32Expr_wrk ( ISelEnv* env, IRExpr* e );
464static HReg          iselDfp32Expr     ( ISelEnv* env, IRExpr* e );
465static HReg          iselDfp64Expr_wrk ( ISelEnv* env, IRExpr* e );
466static HReg          iselDfp64Expr     ( ISelEnv* env, IRExpr* e );
467
468/* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
469static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
470                                 IRExpr* e );
471static void iselDfp128Expr     ( HReg* rHi, HReg* rLo, ISelEnv* env,
472                                 IRExpr* e );
473
474/*---------------------------------------------------------*/
475/*--- ISEL: Misc helpers                                ---*/
476/*---------------------------------------------------------*/
477
478/* Make an int reg-reg move. */
479
480static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
481{
482   vassert(hregClass(r_dst) == hregClass(r_src));
483   vassert(hregClass(r_src) ==  HRcInt32 ||
484           hregClass(r_src) ==  HRcInt64);
485   return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
486}
487
488/* Advance/retreat %r1 by n. */
489
490static void add_to_sp ( ISelEnv* env, UInt n )
491{
492   HReg sp = StackFramePtr(env->mode64);
493   vassert(n <= 1024 && (n%16) == 0);
494   addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
495                               PPCRH_Imm(True,toUShort(n)) ));
496}
497
498static void sub_from_sp ( ISelEnv* env, UInt n )
499{
500   HReg sp = StackFramePtr(env->mode64);
501   vassert(n <= 1024 && (n%16) == 0);
502   addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
503                               PPCRH_Imm(True,toUShort(n)) ));
504}
505
506/*
507  returns a quadword aligned address on the stack
508   - copies SP, adds 16bytes, aligns to quadword.
509  use sub_from_sp(32) before calling this,
510  as expects to have 32 bytes to play with.
511*/
512static HReg get_sp_aligned16 ( ISelEnv* env )
513{
514   HReg       r = newVRegI(env);
515   HReg align16 = newVRegI(env);
516   addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
517   // add 16
518   addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
519                               PPCRH_Imm(True,toUShort(16)) ));
520   // mask to quadword
521   addInstr(env,
522            PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
523   addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
524   return r;
525}
526
527
528
529/* Load 2*I32 regs to fp reg */
530static HReg mk_LoadRR32toFPR ( ISelEnv* env,
531                               HReg r_srcHi, HReg r_srcLo )
532{
533   HReg fr_dst = newVRegF(env);
534   PPCAMode *am_addr0, *am_addr1;
535
536   vassert(!env->mode64);
537   vassert(hregClass(r_srcHi) == HRcInt32);
538   vassert(hregClass(r_srcLo) == HRcInt32);
539
540   sub_from_sp( env, 16 );        // Move SP down 16 bytes
541   am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
542   am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
543
544   // store hi,lo as Ity_I32's
545   addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
546   addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
547
548   // load as float
549   addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
550
551   add_to_sp( env, 16 );          // Reset SP
552   return fr_dst;
553}
554
555/* Load I64 reg to fp reg */
556static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
557{
558   HReg fr_dst = newVRegF(env);
559   PPCAMode *am_addr0;
560
561   vassert(env->mode64);
562   vassert(hregClass(r_src) == HRcInt64);
563
564   sub_from_sp( env, 16 );        // Move SP down 16 bytes
565   am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
566
567   // store as Ity_I64
568   addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
569
570   // load as float
571   addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
572
573   add_to_sp( env, 16 );          // Reset SP
574   return fr_dst;
575}
576
577
578/* Given an amode, return one which references 4 bytes further
579   along. */
580
581static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
582{
583   PPCAMode* am4 = dopyPPCAMode( am );
584   if (am4->tag == Pam_IR
585       && am4->Pam.IR.index + 4 <= 32767) {
586      am4->Pam.IR.index += 4;
587   } else {
588      vpanic("advance4(ppc,host)");
589   }
590   return am4;
591}
592
593
594/* Given a guest-state array descriptor, an index expression and a
595   bias, generate a PPCAMode pointing at the relevant piece of
596   guest state.  */
597static
598PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
599                                IRExpr* off, Int bias )
600{
601   HReg rtmp, roff;
602   Int  elemSz = sizeofIRType(descr->elemTy);
603   Int  nElems = descr->nElems;
604   Int  shift  = 0;
605
606   /* Throw out any cases we don't need.  In theory there might be a
607      day where we need to handle others, but not today. */
608
609   if (nElems != 16 && nElems != 32)
610      vpanic("genGuestArrayOffset(ppc host)(1)");
611
612   switch (elemSz) {
613      case 4:  shift = 2; break;
614      case 8:  shift = 3; break;
615      default: vpanic("genGuestArrayOffset(ppc host)(2)");
616   }
617
618   if (bias < -100 || bias > 100) /* somewhat arbitrarily */
619      vpanic("genGuestArrayOffset(ppc host)(3)");
620   if (descr->base < 0 || descr->base > 5000) /* somewhat arbitrarily */
621      vpanic("genGuestArrayOffset(ppc host)(4)");
622
623   /* Compute off into a reg, %off.  Then return:
624
625         addi %tmp, %off, bias (if bias != 0)
626         andi %tmp, nElems-1
627         sldi %tmp, shift
628         addi %tmp, %tmp, base
629         ... Baseblockptr + %tmp ...
630   */
631   roff = iselWordExpr_R(env, off);
632   rtmp = newVRegI(env);
633   addInstr(env, PPCInstr_Alu(
634                    Palu_ADD,
635                    rtmp, roff,
636                    PPCRH_Imm(True/*signed*/, toUShort(bias))));
637   addInstr(env, PPCInstr_Alu(
638                    Palu_AND,
639                    rtmp, rtmp,
640                    PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
641   addInstr(env, PPCInstr_Shft(
642                    Pshft_SHL,
643                    env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
644                    rtmp, rtmp,
645                    PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
646   addInstr(env, PPCInstr_Alu(
647                    Palu_ADD,
648                    rtmp, rtmp,
649                    PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
650   return
651      PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
652}
653
654
655/*---------------------------------------------------------*/
656/*--- ISEL: Function call helpers                       ---*/
657/*---------------------------------------------------------*/
658
659/* Used only in doHelperCall.  See big comment in doHelperCall re
660   handling of register-parameter args.  This function figures out
661   whether evaluation of an expression might require use of a fixed
662   register.  If in doubt return True (safe but suboptimal).
663*/
664static
665Bool mightRequireFixedRegs ( IRExpr* e )
666{
667   switch (e->tag) {
668   case Iex_RdTmp: case Iex_Const: case Iex_Get:
669      return False;
670   default:
671      return True;
672   }
673}
674
675
676/* Do a complete function call.  |guard| is a Ity_Bit expression
677   indicating whether or not the call happens.  If guard==NULL, the
678   call is unconditional.  |retloc| is set to indicate where the
679   return value is after the call.  The caller (of this fn) must
680   generate code to add |stackAdjustAfterCall| to the stack pointer
681   after the call is done. */
682
683static
684void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
685                    /*OUT*/RetLoc* retloc,
686                    ISelEnv* env,
687                    IRExpr* guard,
688                    IRCallee* cee, IRType retTy, IRExpr** args )
689{
690   PPCCondCode cc;
691   HReg        argregs[PPC_N_REGPARMS];
692   HReg        tmpregs[PPC_N_REGPARMS];
693   Bool        go_fast;
694   Int         n_args, i, argreg;
695   UInt        argiregs;
696   Bool        mode64 = env->mode64;
697
698   /* Set default returns.  We'll update them later if needed. */
699   *stackAdjustAfterCall = 0;
700   *retloc               = mk_RetLoc_INVALID();
701
702   /* These are used for cross-checking that IR-level constraints on
703      the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
704   UInt nVECRETs = 0;
705   UInt nBBPTRs  = 0;
706
707   /* Do we need to force use of an odd-even reg pair for 64-bit args?
708      JRS 31-07-2013: is this still relevant, now that we are not
709      generating code for 32-bit AIX ? */
710   Bool regalign_int64s
711      = (!mode64) && env->vbi->host_ppc32_regalign_int64_args;
712
713   /* Marshal args for a call and do the call.
714
715      This function only deals with a tiny set of possibilities, which
716      cover all helpers in practice.  The restrictions are that only
717      arguments in registers are supported, hence only PPC_N_REGPARMS x
718      (mode32:32 | mode64:64) integer bits in total can be passed.
719      In fact the only supported arg type is (mode32:I32 | mode64:I64).
720
721      The return type can be I{64,32,16,8} or V{128,256}.  In the
722      latter two cases, it is expected that |args| will contain the
723      special node IRExpr_VECRET(), in which case this routine
724      generates code to allocate space on the stack for the vector
725      return value.  Since we are not passing any scalars on the
726      stack, it is enough to preallocate the return space before
727      marshalling any arguments, in this case.
728
729      |args| may also contain IRExpr_BBPTR(), in which case the value
730      in the guest state pointer register is passed as the
731      corresponding argument.
732
733      Generating code which is both efficient and correct when
734      parameters are to be passed in registers is difficult, for the
735      reasons elaborated in detail in comments attached to
736      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
737      of the method described in those comments.
738
739      The problem is split into two cases: the fast scheme and the
740      slow scheme.  In the fast scheme, arguments are computed
741      directly into the target (real) registers.  This is only safe
742      when we can be sure that computation of each argument will not
743      trash any real registers set by computation of any other
744      argument.
745
746      In the slow scheme, all args are first computed into vregs, and
747      once they are all done, they are moved to the relevant real
748      regs.  This always gives correct code, but it also gives a bunch
749      of vreg-to-rreg moves which are usually redundant but are hard
750      for the register allocator to get rid of.
751
752      To decide which scheme to use, all argument expressions are
753      first examined.  If they are all so simple that it is clear they
754      will be evaluated without use of any fixed registers, use the
755      fast scheme, else use the slow scheme.  Note also that only
756      unconditional calls may use the fast scheme, since having to
757      compute a condition expression could itself trash real
758      registers.
759
760      Note this requires being able to examine an expression and
761      determine whether or not evaluation of it might use a fixed
762      register.  That requires knowledge of how the rest of this insn
763      selector works.  Currently just the following 3 are regarded as
764      safe -- hopefully they cover the majority of arguments in
765      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
766   */
767
768   /* Note that the cee->regparms field is meaningless on PPC32/64 host
769      (since there is only one calling convention) and so we always
770      ignore it. */
771
772   n_args = 0;
773   for (i = 0; args[i]; i++)
774      n_args++;
775
776   if (n_args > PPC_N_REGPARMS) {
777      vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
778      // PPC_N_REGPARMS
779   }
780
781   /* This is kind of stupid .. the arrays are sized as PPC_N_REGPARMS
782      but we then assume that that value is 8. */
783   vassert(PPC_N_REGPARMS == 8);
784
785   argregs[0] = hregPPC_GPR3(mode64);
786   argregs[1] = hregPPC_GPR4(mode64);
787   argregs[2] = hregPPC_GPR5(mode64);
788   argregs[3] = hregPPC_GPR6(mode64);
789   argregs[4] = hregPPC_GPR7(mode64);
790   argregs[5] = hregPPC_GPR8(mode64);
791   argregs[6] = hregPPC_GPR9(mode64);
792   argregs[7] = hregPPC_GPR10(mode64);
793   argiregs = 0;
794
795   tmpregs[0] = tmpregs[1] = tmpregs[2] =
796   tmpregs[3] = tmpregs[4] = tmpregs[5] =
797   tmpregs[6] = tmpregs[7] = INVALID_HREG;
798
799   /* First decide which scheme (slow or fast) is to be used.  First
800      assume the fast scheme, and select slow if any contraindications
801      (wow) appear. */
802
803   go_fast = True;
804
805   /* We'll need space on the stack for the return value.  Avoid
806      possible complications with nested calls by using the slow
807      scheme. */
808   if (retTy == Ity_V128 || retTy == Ity_V256)
809      go_fast = False;
810
811   if (go_fast && guard) {
812      if (guard->tag == Iex_Const
813          && guard->Iex.Const.con->tag == Ico_U1
814          && guard->Iex.Const.con->Ico.U1 == True) {
815         /* unconditional */
816      } else {
817         /* Not manifestly unconditional -- be conservative. */
818         go_fast = False;
819      }
820   }
821
822   if (go_fast) {
823      for (i = 0; i < n_args; i++) {
824         IRExpr* arg = args[i];
825         if (UNLIKELY(arg->tag == Iex_BBPTR)) {
826            /* that's OK */
827         }
828         else if (UNLIKELY(arg->tag == Iex_VECRET)) {
829            /* This implies ill-formed IR, since if the IR was
830               well-formed, the return-type test above would have
831               filtered it out. */
832            vpanic("doHelperCall(PPC): invalid IR");
833         }
834         else if (mightRequireFixedRegs(arg)) {
835            go_fast = False;
836            break;
837         }
838      }
839   }
840
841   /* At this point the scheme to use has been established.  Generate
842      code to get the arg values into the argument rregs. */
843
844   if (go_fast) {
845
846      /* FAST SCHEME */
847      argreg = 0;
848
849      for (i = 0; i < n_args; i++) {
850         IRExpr* arg = args[i];
851         vassert(argreg < PPC_N_REGPARMS);
852
853         if (arg->tag == Iex_BBPTR) {
854            argiregs |= (1 << (argreg+3));
855            addInstr(env, mk_iMOVds_RR( argregs[argreg],
856                                        GuestStatePtr(mode64) ));
857            argreg++;
858         } else {
859            vassert(arg->tag != Iex_VECRET);
860            IRType ty = typeOfIRExpr(env->type_env, arg);
861            vassert(ty == Ity_I32 || ty == Ity_I64);
862            if (!mode64) {
863               if (ty == Ity_I32) {
864                  argiregs |= (1 << (argreg+3));
865                  addInstr(env,
866                           mk_iMOVds_RR( argregs[argreg],
867                                         iselWordExpr_R(env, arg) ));
868               } else { // Ity_I64 in 32-bit mode
869                  HReg rHi, rLo;
870                  if (regalign_int64s && (argreg%2) == 1)
871                                 // ppc32 ELF abi spec for passing LONG_LONG
872                     argreg++;   // XXX: odd argreg => even rN
873                  vassert(argreg < PPC_N_REGPARMS-1);
874                  iselInt64Expr(&rHi,&rLo, env, arg);
875                  argiregs |= (1 << (argreg+3));
876                  addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
877                  argiregs |= (1 << (argreg+3));
878                  addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
879               }
880            } else { // mode64
881               argiregs |= (1 << (argreg+3));
882               addInstr(env, mk_iMOVds_RR( argregs[argreg],
883                                           iselWordExpr_R(env, arg) ));
884            }
885            argreg++;
886         } /* if (arg == IRExprP__BBPR) */
887      }
888
889      /* Fast scheme only applies for unconditional calls.  Hence: */
890      cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
891
892   } else {
893
894      /* SLOW SCHEME; move via temporaries */
895      argreg = 0;
896
897      /* If we have a vector return type, allocate a place for it on
898         the stack and record its address.  Rather than figure out the
899         complexities of PPC{32,64} ELF ABI stack frame layout, simply
900         drop the SP by 1024 and allocate the return point in the
901         middle.  I think this should comfortably clear any ABI
902         mandated register save areas.  Note that it doesn't maintain
903         the backchain as it should, since we're not doing st{d,w}u to
904         adjust the SP, but .. that doesn't seem to be a big deal.
905         Since we're not expecting to have to unwind out of here. */
906      HReg r_vecRetAddr = INVALID_HREG;
907      if (retTy == Ity_V128) {
908         r_vecRetAddr = newVRegI(env);
909         sub_from_sp(env, 512);
910         addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
911         sub_from_sp(env, 512);
912      }
913      else if (retTy == Ity_V256) {
914         vassert(0); //ATC
915         r_vecRetAddr = newVRegI(env);
916         sub_from_sp(env, 512);
917         addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
918         sub_from_sp(env, 512);
919      }
920
921      vassert(n_args >= 0 && n_args <= 8);
922      for (i = 0; i < n_args; i++) {
923         IRExpr* arg = args[i];
924         vassert(argreg < PPC_N_REGPARMS);
925         if (UNLIKELY(arg->tag == Iex_BBPTR)) {
926            tmpregs[argreg] = newVRegI(env);
927            addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
928                                        GuestStatePtr(mode64) ));
929            nBBPTRs++;
930         }
931         else if (UNLIKELY(arg->tag == Iex_VECRET)) {
932            /* We stashed the address of the return slot earlier, so just
933               retrieve it now. */
934            vassert(!hregIsInvalid(r_vecRetAddr));
935            tmpregs[i] = r_vecRetAddr;
936            nVECRETs++;
937         }
938         else {
939            IRType ty = typeOfIRExpr(env->type_env, arg);
940            vassert(ty == Ity_I32 || ty == Ity_I64);
941            if (!mode64) {
942               if (ty == Ity_I32) {
943                  tmpregs[argreg] = iselWordExpr_R(env, arg);
944               } else { // Ity_I64 in 32-bit mode
945                  HReg rHi, rLo;
946                  if (regalign_int64s && (argreg%2) == 1)
947                                // ppc32 ELF abi spec for passing LONG_LONG
948                     argreg++;  // XXX: odd argreg => even rN
949                  vassert(argreg < PPC_N_REGPARMS-1);
950                  iselInt64Expr(&rHi,&rLo, env, arg);
951                  tmpregs[argreg++] = rHi;
952                  tmpregs[argreg]   = rLo;
953               }
954            } else { // mode64
955               tmpregs[argreg] = iselWordExpr_R(env, arg);
956            }
957         }
958         argreg++;
959      }
960
961      /* Now we can compute the condition.  We can't do it earlier
962         because the argument computations could trash the condition
963         codes.  Be a bit clever to handle the common case where the
964         guard is 1:Bit. */
965      cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
966      if (guard) {
967         if (guard->tag == Iex_Const
968             && guard->Iex.Const.con->tag == Ico_U1
969             && guard->Iex.Const.con->Ico.U1 == True) {
970            /* unconditional -- do nothing */
971         } else {
972            cc = iselCondCode( env, guard );
973         }
974      }
975
976      /* Move the args to their final destinations. */
977      for (i = 0; i < argreg; i++) {
978         if (hregIsInvalid(tmpregs[i]))  // Skip invalid regs
979            continue;
980         /* None of these insns, including any spill code that might
981            be generated, may alter the condition codes. */
982         argiregs |= (1 << (i+3));
983         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
984      }
985
986   }
987
988   /* Do final checks, set the return values, and generate the call
989      instruction proper. */
990   if (retTy == Ity_V128 || retTy == Ity_V256) {
991      vassert(nVECRETs == 1);
992   } else {
993      vassert(nVECRETs == 0);
994   }
995
996   vassert(nBBPTRs == 0 || nBBPTRs == 1);
997
998   vassert(*stackAdjustAfterCall == 0);
999   vassert(is_RetLoc_INVALID(*retloc));
1000   switch (retTy) {
1001      case Ity_INVALID:
1002         /* Function doesn't return a value. */
1003         *retloc = mk_RetLoc_simple(RLPri_None);
1004         break;
1005      case Ity_I64:
1006         *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
1007         break;
1008      case Ity_I32: case Ity_I16: case Ity_I8:
1009         *retloc = mk_RetLoc_simple(RLPri_Int);
1010         break;
1011      case Ity_V128:
1012         /* Result is 512 bytes up the stack, and after it has been
1013            retrieved, adjust SP upwards by 1024. */
1014         *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 512);
1015         *stackAdjustAfterCall = 1024;
1016         break;
1017      case Ity_V256:
1018         vassert(0); // ATC
1019         /* Ditto */
1020         *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 512);
1021         *stackAdjustAfterCall = 1024;
1022         break;
1023      default:
1024         /* IR can denote other possible return types, but we don't
1025            handle those here. */
1026         vassert(0);
1027   }
1028
1029   /* Finally, generate the call itself.  This needs the *retloc value
1030      set in the switch above, which is why it's at the end. */
1031
1032   ULong target = mode64 ? Ptr_to_ULong(cee->addr)
1033                         : toUInt(Ptr_to_ULong(cee->addr));
1034   addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs, *retloc ));
1035}
1036
1037
1038/*---------------------------------------------------------*/
1039/*--- ISEL: FP rounding mode helpers                    ---*/
1040/*---------------------------------------------------------*/
1041
1042///* Set FPU's rounding mode to the default */
1043//static
1044//void set_FPU_rounding_default ( ISelEnv* env )
1045//{
1046//   HReg fr_src = newVRegF(env);
1047//   HReg r_src  = newVRegI(env);
1048//
1049//   /* Default rounding mode = 0x0
1050//      Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
1051//       - so we can set the whole register at once (faster)
1052//      note: upper 32 bits ignored by FpLdFPSCR
1053//   */
1054//   addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
1055//   if (env->mode64) {
1056//      fr_src = mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
1057//   } else {
1058//      fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1059//   }
1060//   addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
1061//}
1062
1063/* Convert IR rounding mode to PPC encoding */
1064static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
1065{
1066   /*
1067   rounding mode                     | PPC  |  IR
1068   -----------------------------------------------
1069   to nearest, ties to even          | 000  | 000
1070   to zero                           | 001  | 011
1071   to +infinity                      | 010  | 010
1072   to -infinity                      | 011  | 001
1073   +++++ Below are the extended rounding modes for decimal floating point +++++
1074   to nearest, ties away from 0      | 100  | 100
1075   to nearest, ties toward 0         | 101  | 111
1076   to away from 0                    | 110  | 110
1077   to prepare for shorter precision  | 111  | 101
1078   */
1079   HReg r_rmPPC = newVRegI(env);
1080   HReg r_tmp1  = newVRegI(env);
1081   HReg r_tmp2  = newVRegI(env);
1082
1083   vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
1084
1085   // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
1086   //
1087   // slwi  tmp1,    r_rmIR, 1
1088   // xor   tmp1,    r_rmIR, tmp1
1089   // andi  r_rmPPC, tmp1, 3
1090
1091   addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1092                               r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
1093
1094   addInstr( env, PPCInstr_Alu( Palu_AND,
1095                                r_tmp2, r_tmp1, PPCRH_Imm( False, 3 ) ) );
1096
1097   addInstr( env, PPCInstr_Alu( Palu_XOR,
1098                                r_rmPPC, r_rmIR, PPCRH_Reg( r_tmp2 ) ) );
1099
1100   return r_rmPPC;
1101}
1102
1103
1104/* Set the FPU's rounding mode: 'mode' is an I32-typed expression
1105   denoting a value in the range 0 .. 7, indicating a round mode
1106   encoded as per type IRRoundingMode.  Set the PPC FPSCR to have the
1107   same rounding.  When the dfp_rm arg is True, set the decimal
1108   floating point rounding mode bits (29:31); otherwise, set the
1109   binary floating point rounding mode bits (62:63).
1110
1111   For speed & simplicity, we're setting the *entire* FPSCR here.
1112
1113   Setting the rounding mode is expensive.  So this function tries to
1114   avoid repeatedly setting the rounding mode to the same thing by
1115   first comparing 'mode' to the 'mode' tree supplied in the previous
1116   call to this function, if any.  (The previous value is stored in
1117   env->previous_rm.)  If 'mode' is a single IR temporary 't' and
1118   env->previous_rm is also just 't', then the setting is skipped.
1119
1120   This is safe because of the SSA property of IR: an IR temporary can
1121   only be defined once and so will have the same value regardless of
1122   where it appears in the block.  Cool stuff, SSA.
1123
1124   A safety condition: all attempts to set the RM must be aware of
1125   this mechanism - by being routed through the functions here.
1126
1127   Of course this only helps if blocks where the RM is set more than
1128   once and it is set to the same value each time, *and* that value is
1129   held in the same IR temporary each time.  In order to assure the
1130   latter as much as possible, the IR optimiser takes care to do CSE
1131   on any block with any sign of floating point activity.
1132*/
1133static
1134void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm )
1135{
1136   HReg fr_src = newVRegF(env);
1137   HReg r_src;
1138
1139   vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
1140
1141   /* Do we need to do anything? */
1142   if (env->previous_rm
1143       && env->previous_rm->tag == Iex_RdTmp
1144       && mode->tag == Iex_RdTmp
1145       && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
1146      /* no - setting it to what it was before.  */
1147      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
1148      return;
1149   }
1150
1151   /* No luck - we better set it, and remember what we set it to. */
1152   env->previous_rm = mode;
1153
1154   /* Only supporting the rounding-mode bits - the rest of FPSCR is
1155      0x0 - so we can set the whole register at once (faster). */
1156
1157   // Resolve rounding mode and convert to PPC representation
1158   r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode) );
1159
1160   // gpr -> fpr
1161   if (env->mode64) {
1162      if (dfp_rm) {
1163         HReg r_tmp1 = newVRegI( env );
1164         addInstr( env,
1165                   PPCInstr_Shft( Pshft_SHL, False/*64bit shift*/,
1166                                  r_tmp1, r_src, PPCRH_Imm( False, 32 ) ) );
1167         fr_src = mk_LoadR64toFPR( env, r_tmp1 );
1168      } else {
1169         fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1170      }
1171   } else {
1172      if (dfp_rm) {
1173         HReg r_zero = newVRegI( env );
1174         addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
1175         fr_src = mk_LoadRR32toFPR( env, r_src, r_zero );
1176      } else {
1177         fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1178      }
1179   }
1180
1181   // Move to FPSCR
1182   addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
1183}
1184
1185static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
1186{
1187   _set_FPU_rounding_mode(env, mode, False);
1188}
1189
1190static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
1191{
1192   _set_FPU_rounding_mode(env, mode, True);
1193}
1194
1195
1196/*---------------------------------------------------------*/
1197/*--- ISEL: vector helpers                              ---*/
1198/*---------------------------------------------------------*/
1199
1200/* Generate all-zeroes into a new vector register.
1201*/
1202static HReg generate_zeroes_V128 ( ISelEnv* env )
1203{
1204   HReg dst = newVRegV(env);
1205   addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
1206   return dst;
1207}
1208
1209/* Generate all-ones into a new vector register.
1210*/
1211static HReg generate_ones_V128 ( ISelEnv* env )
1212{
1213   HReg dst = newVRegV(env);
1214   PPCVI5s * src = PPCVI5s_Imm(-1);
1215   addInstr(env, PPCInstr_AvSplat(8, dst, src));
1216   return dst;
1217}
1218
1219
1220/*
1221  Generates code for AvSplat
1222  - takes in IRExpr* of type 8|16|32
1223    returns vector reg of duplicated lanes of input
1224  - uses AvSplat(imm) for imms up to simm6.
1225    otherwise must use store reg & load vector
1226*/
1227static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
1228{
1229   HReg   r_src;
1230   HReg   dst = newVRegV(env);
1231   PPCRI* ri  = iselWordExpr_RI(env, e);
1232   IRType ty  = typeOfIRExpr(env->type_env,e);
1233   UInt   sz  = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
1234   vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1235
1236   /* special case: immediate */
1237   if (ri->tag == Pri_Imm) {
1238      Int simm32 = (Int)ri->Pri.Imm;
1239
1240      /* figure out if it's do-able with imm splats. */
1241      if (simm32 >= -32 && simm32 <= 31) {
1242         Char simm6 = (Char)simm32;
1243         if (simm6 > 15) {           /* 16:31 inclusive */
1244            HReg v1 = newVRegV(env);
1245            HReg v2 = newVRegV(env);
1246            addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1247            addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
1248            addInstr(env,
1249               (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
1250               (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
1251                        : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
1252            return dst;
1253         }
1254         if (simm6 < -16) {          /* -32:-17 inclusive */
1255            HReg v1 = newVRegV(env);
1256            HReg v2 = newVRegV(env);
1257            addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1258            addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
1259            addInstr(env,
1260               (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
1261               (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
1262                        : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
1263            return dst;
1264         }
1265         /* simplest form:              -16:15 inclusive */
1266         addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
1267         return dst;
1268      }
1269
1270      /* no luck; use the Slow way. */
1271      r_src = newVRegI(env);
1272      addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
1273   }
1274   else {
1275      r_src = ri->Pri.Reg;
1276   }
1277
1278   /* default case: store r_src in lowest lane of 16-aligned mem,
1279      load vector, splat lowest lane to dst */
1280   {
1281      /* CAB: Maybe faster to store r_src multiple times (sz dependent),
1282              and simply load the vector? */
1283      HReg r_aligned16;
1284      HReg v_src = newVRegV(env);
1285      PPCAMode *am_off12;
1286
1287      sub_from_sp( env, 32 );     // Move SP down
1288      /* Get a 16-aligned address within our stack space */
1289      r_aligned16 = get_sp_aligned16( env );
1290      am_off12 = PPCAMode_IR( 12, r_aligned16 );
1291
1292      /* Store r_src in low word of 16-aligned mem */
1293      addInstr(env, PPCInstr_Store( 4, am_off12, r_src, env->mode64 ));
1294
1295      /* Load src to vector[low lane] */
1296      addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, v_src, am_off12 ) );
1297      add_to_sp( env, 32 );       // Reset SP
1298
1299      /* Finally, splat v_src[low_lane] to dst */
1300      addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Reg(v_src)));
1301      return dst;
1302   }
1303}
1304
1305
1306/* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
1307static HReg isNan ( ISelEnv* env, HReg vSrc )
1308{
1309   HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
1310
1311   vassert(hregClass(vSrc) == HRcVec128);
1312
1313   zeros   = mk_AvDuplicateRI(env, mkU32(0));
1314   msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000));
1315   msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF));
1316   expt    = newVRegV(env);
1317   mnts    = newVRegV(env);
1318   vIsNan  = newVRegV(env);
1319
1320   /* 32bit float => sign(1) | exponent(8) | mantissa(23)
1321      nan => exponent all ones, mantissa > 0 */
1322
1323   addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
1324   addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
1325   addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
1326   addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
1327   addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
1328   return vIsNan;
1329}
1330
1331
1332/*---------------------------------------------------------*/
1333/*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
1334/*---------------------------------------------------------*/
1335
1336/* Select insns for an integer-typed expression, and add them to the
1337   code list.  Return a reg holding the result.  This reg will be a
1338   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
1339   want to modify it, ask for a new vreg, copy it in there, and modify
1340   the copy.  The register allocator will do its best to map both
1341   vregs to the same real register, so the copies will often disappear
1342   later in the game.
1343
1344   This should handle expressions of 64, 32, 16 and 8-bit type.
1345   All results are returned in a (mode64 ? 64bit : 32bit) register.
1346   For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
1347   are arbitrary, so you should mask or sign extend partial values
1348   if necessary.
1349*/
1350
1351static HReg iselWordExpr_R ( ISelEnv* env, IRExpr* e )
1352{
1353   HReg r = iselWordExpr_R_wrk(env, e);
1354   /* sanity checks ... */
1355#  if 0
1356   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1357#  endif
1358
1359   vassert(hregClass(r) == HRcGPR(env->mode64));
1360   vassert(hregIsVirtual(r));
1361   return r;
1362}
1363
1364/* DO NOT CALL THIS DIRECTLY ! */
1365static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1366{
1367   Bool mode64 = env->mode64;
1368   MatchInfo mi;
1369   DECLARE_PATTERN(p_32to1_then_1Uto8);
1370
1371   IRType ty = typeOfIRExpr(env->type_env,e);
1372   vassert(ty == Ity_I8 || ty == Ity_I16 ||
1373           ty == Ity_I32 || ((ty == Ity_I64) && mode64));
1374
1375   switch (e->tag) {
1376
1377   /* --------- TEMP --------- */
1378   case Iex_RdTmp:
1379      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1380
1381   /* --------- LOAD --------- */
1382   case Iex_Load: {
1383      HReg      r_dst;
1384      PPCAMode* am_addr;
1385      if (e->Iex.Load.end != Iend_BE)
1386         goto irreducible;
1387      r_dst   = newVRegI(env);
1388      am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/ );
1389      addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
1390                                   r_dst, am_addr, mode64 ));
1391      return r_dst;
1392      /*NOTREACHED*/
1393   }
1394
1395   /* --------- BINARY OP --------- */
1396   case Iex_Binop: {
1397      PPCAluOp  aluOp;
1398      PPCShftOp shftOp;
1399
1400      /* Is it an addition or logical style op? */
1401      switch (e->Iex.Binop.op) {
1402      case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
1403         aluOp = Palu_ADD; break;
1404      case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
1405         aluOp = Palu_SUB; break;
1406      case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
1407         aluOp = Palu_AND; break;
1408      case Iop_Or8:  case Iop_Or16:  case Iop_Or32:  case Iop_Or64:
1409         aluOp = Palu_OR; break;
1410      case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
1411         aluOp = Palu_XOR; break;
1412      default:
1413         aluOp = Palu_INVALID; break;
1414      }
1415      /* For commutative ops we assume any literal
1416         values are on the second operand. */
1417      if (aluOp != Palu_INVALID) {
1418         HReg   r_dst   = newVRegI(env);
1419         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
1420         PPCRH* ri_srcR = NULL;
1421         /* get right arg into an RH, in the appropriate way */
1422         switch (aluOp) {
1423         case Palu_ADD: case Palu_SUB:
1424            ri_srcR = iselWordExpr_RH(env, True/*signed*/,
1425                                      e->Iex.Binop.arg2);
1426            break;
1427         case Palu_AND: case Palu_OR: case Palu_XOR:
1428            ri_srcR = iselWordExpr_RH(env, False/*signed*/,
1429                                      e->Iex.Binop.arg2);
1430            break;
1431         default:
1432            vpanic("iselWordExpr_R_wrk-aluOp-arg2");
1433         }
1434         addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
1435         return r_dst;
1436      }
1437
1438      /* a shift? */
1439      switch (e->Iex.Binop.op) {
1440      case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
1441         shftOp = Pshft_SHL; break;
1442      case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
1443         shftOp = Pshft_SHR; break;
1444      case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
1445         shftOp = Pshft_SAR; break;
1446      default:
1447         shftOp = Pshft_INVALID; break;
1448      }
1449      /* we assume any literal values are on the second operand. */
1450      if (shftOp != Pshft_INVALID) {
1451         HReg   r_dst   = newVRegI(env);
1452         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
1453         PPCRH* ri_srcR = NULL;
1454         /* get right arg into an RH, in the appropriate way */
1455         switch (shftOp) {
1456         case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
1457            if (!mode64)
1458               ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2);
1459            else
1460               ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2);
1461            break;
1462         default:
1463            vpanic("iselIntExpr_R_wrk-shftOp-arg2");
1464         }
1465         /* widen the left arg if needed */
1466         if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
1467            if (ty == Ity_I8 || ty == Ity_I16) {
1468               PPCRH* amt = PPCRH_Imm(False,
1469                                      toUShort(ty == Ity_I8 ? 24 : 16));
1470               HReg   tmp = newVRegI(env);
1471               addInstr(env, PPCInstr_Shft(Pshft_SHL,
1472                                           True/*32bit shift*/,
1473                                           tmp, r_srcL, amt));
1474               addInstr(env, PPCInstr_Shft(shftOp,
1475                                           True/*32bit shift*/,
1476                                           tmp, tmp,    amt));
1477               r_srcL = tmp;
1478               vassert(0); /* AWAITING TEST CASE */
1479            }
1480         }
1481         /* Only 64 expressions need 64bit shifts,
1482            32bit shifts are fine for all others */
1483         if (ty == Ity_I64) {
1484            vassert(mode64);
1485            addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
1486                                        r_dst, r_srcL, ri_srcR));
1487         } else {
1488            addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
1489                                        r_dst, r_srcL, ri_srcR));
1490         }
1491         return r_dst;
1492      }
1493
1494      /* How about a div? */
1495      if (e->Iex.Binop.op == Iop_DivS32 ||
1496          e->Iex.Binop.op == Iop_DivU32 ||
1497          e->Iex.Binop.op == Iop_DivS32E ||
1498          e->Iex.Binop.op == Iop_DivU32E) {
1499         Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
1500         HReg r_dst  = newVRegI(env);
1501         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
1502         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
1503         addInstr( env,
1504                      PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
1505                                             || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
1506                                                                                     : False,
1507                                    syned,
1508                                    True/*32bit div*/,
1509                                    r_dst,
1510                                    r_srcL,
1511                                    r_srcR ) );
1512         return r_dst;
1513      }
1514      if (e->Iex.Binop.op == Iop_DivS64 ||
1515          e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
1516          || e->Iex.Binop.op == Iop_DivU64E ) {
1517         Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
1518         HReg r_dst  = newVRegI(env);
1519         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
1520         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
1521         vassert(mode64);
1522         addInstr( env,
1523                      PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
1524                                             || ( e->Iex.Binop.op
1525                                                      == Iop_DivU64E ) ) ? True
1526                                                                         : False,
1527                                    syned,
1528                                    False/*64bit div*/,
1529                                    r_dst,
1530                                    r_srcL,
1531                                    r_srcR ) );
1532         return r_dst;
1533      }
1534
1535      /* No? Anyone for a mul? */
1536      if (e->Iex.Binop.op == Iop_Mul32
1537          || e->Iex.Binop.op == Iop_Mul64) {
1538         Bool syned       = False;
1539         Bool sz32        = (e->Iex.Binop.op != Iop_Mul64);
1540         HReg r_dst       = newVRegI(env);
1541         HReg r_srcL      = iselWordExpr_R(env, e->Iex.Binop.arg1);
1542         HReg r_srcR      = iselWordExpr_R(env, e->Iex.Binop.arg2);
1543         addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
1544                                     r_dst, r_srcL, r_srcR));
1545         return r_dst;
1546      }
1547
1548      /* 32 x 32 -> 64 multiply */
1549      if (mode64
1550          && (e->Iex.Binop.op == Iop_MullU32
1551              || e->Iex.Binop.op == Iop_MullS32)) {
1552         HReg tLo    = newVRegI(env);
1553         HReg tHi    = newVRegI(env);
1554         HReg r_dst  = newVRegI(env);
1555         Bool syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
1556         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
1557         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
1558         addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
1559                                     False/*lo32*/, True/*32bit mul*/,
1560                                     tLo, r_srcL, r_srcR));
1561         addInstr(env, PPCInstr_MulL(syned,
1562                                     True/*hi32*/, True/*32bit mul*/,
1563                                     tHi, r_srcL, r_srcR));
1564         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1565                                     r_dst, tHi, PPCRH_Imm(False,32)));
1566         addInstr(env, PPCInstr_Alu(Palu_OR,
1567                                    r_dst, r_dst, PPCRH_Reg(tLo)));
1568         return r_dst;
1569      }
1570
1571      /* El-mutanto 3-way compare? */
1572      if (e->Iex.Binop.op == Iop_CmpORD32S
1573          || e->Iex.Binop.op == Iop_CmpORD32U) {
1574         Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
1575         HReg   dst   = newVRegI(env);
1576         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
1577         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
1578         addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
1579                                    7/*cr*/, srcL, srcR));
1580         addInstr(env, PPCInstr_MfCR(dst));
1581         addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1582                                    PPCRH_Imm(False,7<<1)));
1583         return dst;
1584      }
1585
1586      if (e->Iex.Binop.op == Iop_CmpORD64S
1587          || e->Iex.Binop.op == Iop_CmpORD64U) {
1588         Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
1589         HReg   dst   = newVRegI(env);
1590         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
1591         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
1592         vassert(mode64);
1593         addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
1594                                    7/*cr*/, srcL, srcR));
1595         addInstr(env, PPCInstr_MfCR(dst));
1596         addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1597                                    PPCRH_Imm(False,7<<1)));
1598         return dst;
1599      }
1600
1601      if (e->Iex.Binop.op == Iop_Max32U) {
1602         HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1);
1603         HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2);
1604         HReg        rdst = newVRegI(env);
1605         PPCCondCode cc   = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
1606         addInstr(env, mk_iMOVds_RR(rdst, r1));
1607         addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
1608                                    7/*cr*/, rdst, PPCRH_Reg(r2)));
1609         addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
1610         return rdst;
1611      }
1612
1613      if (e->Iex.Binop.op == Iop_32HLto64) {
1614         HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1);
1615         HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2);
1616         HReg   r_Tmp = newVRegI(env);
1617         HReg   r_dst = newVRegI(env);
1618         HReg   msk   = newVRegI(env);
1619         vassert(mode64);
1620         /* r_dst = OR( r_Hi<<32, r_Lo ) */
1621         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1622                                     r_dst, r_Hi, PPCRH_Imm(False,32)));
1623         addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
1624         addInstr(env, PPCInstr_Alu( Palu_AND, r_Tmp, r_Lo,
1625                                     PPCRH_Reg(msk) ));
1626         addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
1627                                     PPCRH_Reg(r_Tmp) ));
1628         return r_dst;
1629      }
1630
1631      if ((e->Iex.Binop.op == Iop_CmpF64) ||
1632          (e->Iex.Binop.op == Iop_CmpD64) ||
1633          (e->Iex.Binop.op == Iop_CmpD128)) {
1634         HReg fr_srcL;
1635         HReg fr_srcL_lo;
1636         HReg fr_srcR;
1637         HReg fr_srcR_lo;
1638
1639         HReg r_ccPPC   = newVRegI(env);
1640         HReg r_ccIR    = newVRegI(env);
1641         HReg r_ccIR_b0 = newVRegI(env);
1642         HReg r_ccIR_b2 = newVRegI(env);
1643         HReg r_ccIR_b6 = newVRegI(env);
1644
1645         if (e->Iex.Binop.op == Iop_CmpF64) {
1646            fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1647            fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1648            addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
1649
1650         } else if (e->Iex.Binop.op == Iop_CmpD64) {
1651            fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1);
1652            fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2);
1653            addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
1654
1655         } else {    //  e->Iex.Binop.op == Iop_CmpD128
1656            iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1);
1657            iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2);
1658            addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
1659                                             fr_srcR, fr_srcR_lo));
1660         }
1661
1662         /* Map compare result from PPC to IR,
1663            conforming to CmpF64 definition. */
1664         /*
1665           FP cmp result | PPC | IR
1666           --------------------------
1667           UN            | 0x1 | 0x45
1668           EQ            | 0x2 | 0x40
1669           GT            | 0x4 | 0x00
1670           LT            | 0x8 | 0x01
1671         */
1672
1673         // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
1674         addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1675                                     r_ccIR_b0, r_ccPPC,
1676                                     PPCRH_Imm(False,0x3)));
1677         addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b0,
1678                                    r_ccPPC,   PPCRH_Reg(r_ccIR_b0)));
1679         addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
1680                                    r_ccIR_b0, PPCRH_Imm(False,0x1)));
1681
1682         // r_ccIR_b2 = r_ccPPC[0]
1683         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1684                                     r_ccIR_b2, r_ccPPC,
1685                                     PPCRH_Imm(False,0x2)));
1686         addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
1687                                    r_ccIR_b2, PPCRH_Imm(False,0x4)));
1688
1689         // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
1690         addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1691                                     r_ccIR_b6, r_ccPPC,
1692                                     PPCRH_Imm(False,0x1)));
1693         addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b6,
1694                                    r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
1695         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1696                                     r_ccIR_b6, r_ccIR_b6,
1697                                     PPCRH_Imm(False,0x6)));
1698         addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
1699                                    r_ccIR_b6, PPCRH_Imm(False,0x40)));
1700
1701         // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
1702         addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1703                                    r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
1704         addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1705                                    r_ccIR,    PPCRH_Reg(r_ccIR_b6)));
1706         return r_ccIR;
1707      }
1708
1709      if ( e->Iex.Binop.op == Iop_F64toI32S ||
1710               e->Iex.Binop.op == Iop_F64toI32U ) {
1711         /* This works in both mode64 and mode32. */
1712         HReg      r1      = StackFramePtr(env->mode64);
1713         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1714         HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2);
1715         HReg      ftmp    = newVRegF(env);
1716         HReg      idst    = newVRegI(env);
1717
1718         /* Set host rounding mode */
1719         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1720
1721         sub_from_sp( env, 16 );
1722         addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
1723                                       e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
1724                                                                     : False,
1725                                       True/*flt64*/,
1726                                       ftmp, fsrc));
1727         addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
1728         addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
1729
1730         /* in 64-bit mode we need to sign-widen idst. */
1731         if (mode64)
1732            addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
1733
1734         add_to_sp( env, 16 );
1735
1736         ///* Restore default FPU rounding. */
1737         //set_FPU_rounding_default( env );
1738         return idst;
1739      }
1740
1741      if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
1742         if (mode64) {
1743            HReg      r1      = StackFramePtr(env->mode64);
1744            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1745            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2);
1746            HReg      idst    = newVRegI(env);
1747            HReg      ftmp    = newVRegF(env);
1748
1749            /* Set host rounding mode */
1750            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1751
1752            sub_from_sp( env, 16 );
1753            addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
1754                                          ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
1755                                                                            : False,
1756                                          True, ftmp, fsrc));
1757            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1758            addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1759            add_to_sp( env, 16 );
1760
1761            ///* Restore default FPU rounding. */
1762            //set_FPU_rounding_default( env );
1763            return idst;
1764         }
1765      }
1766
1767      if (e->Iex.Binop.op == Iop_D64toI64S ) {
1768         HReg      r1      = StackFramePtr(env->mode64);
1769         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1770         HReg      fr_src  = iselDfp64Expr(env, e->Iex.Binop.arg2);
1771         HReg      idst    = newVRegI(env);
1772         HReg      ftmp    = newVRegF(env);
1773
1774         /* Set host rounding mode */
1775         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
1776         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
1777         sub_from_sp( env, 16 );
1778         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1779         addInstr(env, PPCInstr_Load(8, idst, zero_r1, mode64));
1780
1781         add_to_sp( env, 16 );
1782
1783         ///* Restore default FPU rounding. */
1784         //set_FPU_rounding_default( env );
1785         return idst;
1786      }
1787
1788      if (e->Iex.Binop.op == Iop_D128toI64S ) {
1789         PPCFpOp fpop = Pfp_DCTFIXQ;
1790         HReg r_srcHi = newVRegF(env);
1791         HReg r_srcLo = newVRegF(env);
1792         HReg idst    = newVRegI(env);
1793         HReg ftmp    = newVRegF(env);
1794         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
1795
1796         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
1797         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
1798         addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
1799
1800         // put the D64 result into an integer register
1801         sub_from_sp( env, 16 );
1802         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1803         addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1804         add_to_sp( env, 16 );
1805         return idst;
1806      }
1807      break;
1808   }
1809
1810   /* --------- UNARY OP --------- */
1811   case Iex_Unop: {
1812      IROp op_unop = e->Iex.Unop.op;
1813
1814      /* 1Uto8(32to1(expr32)) */
1815      DEFINE_PATTERN(p_32to1_then_1Uto8,
1816                     unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1817      if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1818         IRExpr* expr32 = mi.bindee[0];
1819         HReg r_dst = newVRegI(env);
1820         HReg r_src = iselWordExpr_R(env, expr32);
1821         addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
1822                                    r_src, PPCRH_Imm(False,1)));
1823         return r_dst;
1824      }
1825
1826      /* 16Uto32(LDbe:I16(expr32)) */
1827      {
1828         DECLARE_PATTERN(p_LDbe16_then_16Uto32);
1829         DEFINE_PATTERN(p_LDbe16_then_16Uto32,
1830                        unop(Iop_16Uto32,
1831                             IRExpr_Load(Iend_BE,Ity_I16,bind(0))) );
1832         if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
1833            HReg r_dst = newVRegI(env);
1834            PPCAMode* amode
1835               = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/ );
1836            addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
1837            return r_dst;
1838         }
1839      }
1840
1841      switch (op_unop) {
1842      case Iop_8Uto16:
1843      case Iop_8Uto32:
1844      case Iop_8Uto64:
1845      case Iop_16Uto32:
1846      case Iop_16Uto64: {
1847         HReg   r_dst = newVRegI(env);
1848         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1849         UShort mask  = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
1850                                 op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
1851         addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
1852                                    PPCRH_Imm(False,mask)));
1853         return r_dst;
1854      }
1855      case Iop_32Uto64: {
1856         HReg r_dst = newVRegI(env);
1857         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1858         vassert(mode64);
1859         addInstr(env,
1860                  PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1861                                r_dst, r_src, PPCRH_Imm(False,32)));
1862         addInstr(env,
1863                  PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1864                                r_dst, r_dst, PPCRH_Imm(False,32)));
1865         return r_dst;
1866      }
1867      case Iop_8Sto16:
1868      case Iop_8Sto32:
1869      case Iop_16Sto32: {
1870         HReg   r_dst = newVRegI(env);
1871         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1872         UShort amt   = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
1873         addInstr(env,
1874                  PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1875                                r_dst, r_src, PPCRH_Imm(False,amt)));
1876         addInstr(env,
1877                  PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1878                                r_dst, r_dst, PPCRH_Imm(False,amt)));
1879         return r_dst;
1880      }
1881      case Iop_8Sto64:
1882      case Iop_16Sto64: {
1883         HReg   r_dst = newVRegI(env);
1884         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1885         UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 : 48);
1886         vassert(mode64);
1887         addInstr(env,
1888                  PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1889                                r_dst, r_src, PPCRH_Imm(False,amt)));
1890         addInstr(env,
1891                  PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
1892                                r_dst, r_dst, PPCRH_Imm(False,amt)));
1893         return r_dst;
1894      }
1895      case Iop_32Sto64: {
1896         HReg   r_dst = newVRegI(env);
1897         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1898	 vassert(mode64);
1899         /* According to the IBM docs, in 64 bit mode, srawi r,r,0
1900            sign extends the lower 32 bits into the upper 32 bits. */
1901         addInstr(env,
1902                  PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1903                                r_dst, r_src, PPCRH_Imm(False,0)));
1904         return r_dst;
1905      }
1906      case Iop_Not8:
1907      case Iop_Not16:
1908      case Iop_Not32:
1909      case Iop_Not64: {
1910         if (op_unop == Iop_Not64) vassert(mode64);
1911         HReg r_dst = newVRegI(env);
1912         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1913         addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
1914         return r_dst;
1915      }
1916      case Iop_64HIto32: {
1917         if (!mode64) {
1918            HReg rHi, rLo;
1919            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1920            return rHi; /* and abandon rLo .. poor wee thing :-) */
1921         } else {
1922            HReg   r_dst = newVRegI(env);
1923            HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1924            addInstr(env,
1925                     PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1926                                   r_dst, r_src, PPCRH_Imm(False,32)));
1927            return r_dst;
1928         }
1929      }
1930      case Iop_64to32: {
1931         if (!mode64) {
1932            HReg rHi, rLo;
1933            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1934            return rLo; /* similar stupid comment to the above ... */
1935         } else {
1936            /* This is a no-op. */
1937            return iselWordExpr_R(env, e->Iex.Unop.arg);
1938         }
1939      }
1940      case Iop_64to16: {
1941         if (mode64) { /* This is a no-op. */
1942            return iselWordExpr_R(env, e->Iex.Unop.arg);
1943         }
1944         break; /* evidently not used in 32-bit mode */
1945      }
1946      case Iop_16HIto8:
1947      case Iop_32HIto16: {
1948         HReg   r_dst = newVRegI(env);
1949         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
1950         UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
1951         addInstr(env,
1952                  PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1953                                r_dst, r_src, PPCRH_Imm(False,shift)));
1954         return r_dst;
1955      }
1956      case Iop_128HIto64:
1957         if (mode64) {
1958            HReg rHi, rLo;
1959            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1960            return rHi; /* and abandon rLo .. poor wee thing :-) */
1961         }
1962         break;
1963      case Iop_128to64:
1964         if (mode64) {
1965            HReg rHi, rLo;
1966            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1967            return rLo; /* similar stupid comment to the above ... */
1968         }
1969         break;
1970      case Iop_1Uto64:
1971      case Iop_1Uto32:
1972      case Iop_1Uto8:
1973         if ((op_unop != Iop_1Uto64) || mode64) {
1974            HReg        r_dst = newVRegI(env);
1975            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
1976            addInstr(env, PPCInstr_Set(cond,r_dst));
1977            return r_dst;
1978         }
1979         break;
1980      case Iop_1Sto8:
1981      case Iop_1Sto16:
1982      case Iop_1Sto32: {
1983         /* could do better than this, but for now ... */
1984         HReg        r_dst = newVRegI(env);
1985         PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
1986         addInstr(env, PPCInstr_Set(cond,r_dst));
1987         addInstr(env,
1988                  PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1989                                r_dst, r_dst, PPCRH_Imm(False,31)));
1990         addInstr(env,
1991                  PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1992                                r_dst, r_dst, PPCRH_Imm(False,31)));
1993         return r_dst;
1994      }
1995      case Iop_1Sto64:
1996         if (mode64) {
1997            /* could do better than this, but for now ... */
1998            HReg        r_dst = newVRegI(env);
1999            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
2000            addInstr(env, PPCInstr_Set(cond,r_dst));
2001            addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
2002                                        r_dst, r_dst, PPCRH_Imm(False,63)));
2003            addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2004                                        r_dst, r_dst, PPCRH_Imm(False,63)));
2005            return r_dst;
2006         }
2007         break;
2008      case Iop_Clz32:
2009      case Iop_Clz64: {
2010         HReg r_src, r_dst;
2011         PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
2012                                                      Pun_CLZ64;
2013         if (op_unop == Iop_Clz64 && !mode64)
2014            goto irreducible;
2015         /* Count leading zeroes. */
2016         r_dst = newVRegI(env);
2017         r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
2018         addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2019         return r_dst;
2020      }
2021
2022      case Iop_Left8:
2023      case Iop_Left16:
2024      case Iop_Left32:
2025      case Iop_Left64: {
2026         HReg r_src, r_dst;
2027         if (op_unop == Iop_Left64 && !mode64)
2028            goto irreducible;
2029         r_dst = newVRegI(env);
2030         r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
2031         addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2032         addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2033         return r_dst;
2034      }
2035
2036      case Iop_CmpwNEZ32: {
2037         HReg r_dst = newVRegI(env);
2038         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
2039         addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2040         addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2041         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2042                                     r_dst, r_dst, PPCRH_Imm(False, 31)));
2043         return r_dst;
2044      }
2045
2046      case Iop_CmpwNEZ64: {
2047         HReg r_dst = newVRegI(env);
2048         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
2049         if (!mode64) goto irreducible;
2050         addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2051         addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2052         addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2053                                     r_dst, r_dst, PPCRH_Imm(False, 63)));
2054         return r_dst;
2055      }
2056
2057      case Iop_V128to32: {
2058         HReg        r_aligned16;
2059         HReg        dst  = newVRegI(env);
2060         HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg);
2061         PPCAMode *am_off0, *am_off12;
2062         sub_from_sp( env, 32 );     // Move SP down 32 bytes
2063
2064         // get a quadword aligned address within our stack space
2065         r_aligned16 = get_sp_aligned16( env );
2066         am_off0  = PPCAMode_IR( 0, r_aligned16 );
2067         am_off12 = PPCAMode_IR( 12,r_aligned16 );
2068
2069         // store vec, load low word to dst
2070         addInstr(env,
2071                  PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2072         addInstr(env,
2073                  PPCInstr_Load( 4, dst, am_off12, mode64 ));
2074
2075         add_to_sp( env, 32 );       // Reset SP
2076         return dst;
2077      }
2078
2079      case Iop_V128to64:
2080      case Iop_V128HIto64:
2081         if (mode64) {
2082            HReg     r_aligned16;
2083            HReg     dst = newVRegI(env);
2084            HReg     vec = iselVecExpr(env, e->Iex.Unop.arg);
2085            PPCAMode *am_off0, *am_off8;
2086            sub_from_sp( env, 32 );     // Move SP down 32 bytes
2087
2088            // get a quadword aligned address within our stack space
2089            r_aligned16 = get_sp_aligned16( env );
2090            am_off0 = PPCAMode_IR( 0, r_aligned16 );
2091            am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
2092
2093            // store vec, load low word (+8) or high (+0) to dst
2094            addInstr(env,
2095                     PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2096            addInstr(env,
2097                     PPCInstr_Load(
2098                        8, dst,
2099                        op_unop == Iop_V128HIto64 ? am_off0 : am_off8,
2100                        mode64 ));
2101
2102            add_to_sp( env, 32 );       // Reset SP
2103            return dst;
2104         }
2105         break;
2106      case Iop_16to8:
2107      case Iop_32to8:
2108      case Iop_32to16:
2109      case Iop_64to8:
2110         /* These are no-ops. */
2111         return iselWordExpr_R(env, e->Iex.Unop.arg);
2112
2113      /* ReinterpF64asI64(e) */
2114      /* Given an IEEE754 double, produce an I64 with the same bit
2115         pattern. */
2116      case Iop_ReinterpF64asI64:
2117         if (mode64) {
2118            PPCAMode *am_addr;
2119            HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
2120            HReg r_dst  = newVRegI(env);
2121
2122            sub_from_sp( env, 16 );     // Move SP down 16 bytes
2123            am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2124
2125            // store as F64
2126            addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2127                                           fr_src, am_addr ));
2128            // load as Ity_I64
2129            addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2130
2131            add_to_sp( env, 16 );       // Reset SP
2132            return r_dst;
2133         }
2134         break;
2135
2136      /* ReinterpF32asI32(e) */
2137      /* Given an IEEE754 float, produce an I32 with the same bit
2138         pattern. */
2139      case Iop_ReinterpF32asI32: {
2140         /* I believe this generates correct code for both 32- and
2141            64-bit hosts. */
2142         PPCAMode *am_addr;
2143         HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg);
2144         HReg r_dst  = newVRegI(env);
2145
2146         sub_from_sp( env, 16 );     // Move SP down 16 bytes
2147         am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2148
2149         // store as F32
2150         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
2151                                        fr_src, am_addr ));
2152         // load as Ity_I32
2153         addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
2154
2155         add_to_sp( env, 16 );       // Reset SP
2156         return r_dst;
2157      }
2158      break;
2159
2160      case Iop_ReinterpD64asI64:
2161         if (mode64) {
2162            PPCAMode *am_addr;
2163            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg);
2164            HReg r_dst  = newVRegI(env);
2165
2166            sub_from_sp( env, 16 );     // Move SP down 16 bytes
2167            am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2168
2169            // store as D64
2170            addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2171                                           fr_src, am_addr ));
2172            // load as Ity_I64
2173            addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2174            add_to_sp( env, 16 );       // Reset SP
2175            return r_dst;
2176         }
2177         break;
2178
2179      case Iop_BCDtoDPB: {
2180         /* the following is only valid in 64 bit mode */
2181         if (!mode64) break;
2182
2183         PPCCondCode cc;
2184         UInt        argiregs;
2185         HReg        argregs[1];
2186         HReg        r_dst  = newVRegI(env);
2187         Int         argreg;
2188         HWord*      fdescr;
2189
2190         argiregs = 0;
2191         argreg = 0;
2192         argregs[0] = hregPPC_GPR3(mode64);
2193
2194         argiregs |= (1 << (argreg+3));
2195         addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2196                                     iselWordExpr_R(env, e->Iex.Unop.arg) ) );
2197
2198         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2199
2200         fdescr = (HWord*)h_calc_BCDtoDPB;
2201         addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2202                                      argiregs, mk_RetLoc_simple(RLPri_Int)) );
2203
2204         addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2205         return r_dst;
2206      }
2207
2208      case Iop_DPBtoBCD: {
2209         /* the following is only valid in 64 bit mode */
2210         if (!mode64) break;
2211
2212         PPCCondCode cc;
2213         UInt        argiregs;
2214         HReg        argregs[1];
2215         HReg        r_dst  = newVRegI(env);
2216         Int         argreg;
2217         HWord*      fdescr;
2218
2219         argiregs = 0;
2220         argreg = 0;
2221         argregs[0] = hregPPC_GPR3(mode64);
2222
2223         argiregs |= (1 << (argreg+3));
2224         addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2225                                     iselWordExpr_R(env, e->Iex.Unop.arg) ) );
2226
2227         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2228
2229         fdescr = (HWord*)h_calc_DPBtoBCD;
2230         addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2231                                      argiregs, mk_RetLoc_simple(RLPri_Int) ) );
2232
2233         addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2234         return r_dst;
2235      }
2236
2237      default:
2238         break;
2239      }
2240
2241     switch (e->Iex.Unop.op) {
2242        case Iop_ExtractExpD64: {
2243
2244            HReg fr_dst = newVRegI(env);
2245            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg);
2246            HReg tmp    = newVRegF(env);
2247            PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2248            addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
2249
2250            // put the D64 result into a integer register
2251            sub_from_sp( env, 16 );
2252            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2253            addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2254            add_to_sp( env, 16 );
2255            return fr_dst;
2256         }
2257         case Iop_ExtractExpD128: {
2258            HReg fr_dst = newVRegI(env);
2259            HReg r_srcHi;
2260            HReg r_srcLo;
2261            HReg tmp    = newVRegF(env);
2262            PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2263
2264            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
2265            addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
2266                                                  r_srcHi, r_srcLo));
2267
2268            sub_from_sp( env, 16 );
2269            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2270            addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2271            add_to_sp( env, 16 );
2272            return fr_dst;
2273         }
2274         default:
2275            break;
2276      }
2277
2278      break;
2279   }
2280
2281   /* --------- GET --------- */
2282   case Iex_Get: {
2283      if (ty == Ity_I8  || ty == Ity_I16 ||
2284          ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
2285         HReg r_dst = newVRegI(env);
2286         PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
2287                                          GuestStatePtr(mode64) );
2288         addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
2289                                      r_dst, am_addr, mode64 ));
2290         return r_dst;
2291      }
2292      break;
2293   }
2294
2295   case Iex_GetI: {
2296      PPCAMode* src_am
2297         = genGuestArrayOffset( env, e->Iex.GetI.descr,
2298                                     e->Iex.GetI.ix, e->Iex.GetI.bias );
2299      HReg r_dst = newVRegI(env);
2300      if (mode64 && ty == Ity_I64) {
2301         addInstr(env, PPCInstr_Load( toUChar(8),
2302                                      r_dst, src_am, mode64 ));
2303         return r_dst;
2304      }
2305      if ((!mode64) && ty == Ity_I32) {
2306         addInstr(env, PPCInstr_Load( toUChar(4),
2307                                      r_dst, src_am, mode64 ));
2308         return r_dst;
2309      }
2310      break;
2311   }
2312
2313   /* --------- CCALL --------- */
2314   case Iex_CCall: {
2315      vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
2316
2317      /* be very restrictive for now.  Only 32/64-bit ints allowed for
2318         args, and 32 bits or host machine word for return type. */
2319      if (!(ty == Ity_I32 || (mode64 && ty == Ity_I64)))
2320         goto irreducible;
2321
2322      /* Marshal args, do the call, clear stack. */
2323      UInt   addToSp = 0;
2324      RetLoc rloc    = mk_RetLoc_INVALID();
2325      doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2326                    e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
2327      vassert(is_sane_RetLoc(rloc));
2328      vassert(rloc.pri == RLPri_Int);
2329      vassert(addToSp == 0);
2330
2331      /* GPR3 now holds the destination address from Pin_Goto */
2332      HReg r_dst = newVRegI(env);
2333      addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
2334      return r_dst;
2335   }
2336
2337   /* --------- LITERAL --------- */
2338   /* 32/16/8-bit literals */
2339   case Iex_Const: {
2340      Long l;
2341      HReg r_dst = newVRegI(env);
2342      IRConst* con = e->Iex.Const.con;
2343      switch (con->tag) {
2344         case Ico_U64: if (!mode64) goto irreducible;
2345                       l = (Long)            con->Ico.U64; break;
2346         case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2347         case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2348         case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2349         default:      vpanic("iselIntExpr_R.const(ppc)");
2350      }
2351      addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
2352      return r_dst;
2353   }
2354
2355   /* --------- MULTIPLEX --------- */
2356   case Iex_ITE: { // VFD
2357      if ((ty == Ity_I8  || ty == Ity_I16 ||
2358           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
2359          typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
2360         PPCRI* r1    = iselWordExpr_RI(env, e->Iex.ITE.iftrue);
2361         HReg   r0    = iselWordExpr_R(env, e->Iex.ITE.iffalse);
2362         HReg   r_dst = newVRegI(env);
2363         addInstr(env, mk_iMOVds_RR(r_dst,r0));
2364         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
2365         addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
2366         return r_dst;
2367      }
2368      break;
2369   }
2370
2371   default:
2372      break;
2373   } /* switch (e->tag) */
2374
2375
2376   /* We get here if no pattern matched. */
2377 irreducible:
2378   ppIRExpr(e);
2379   vpanic("iselIntExpr_R(ppc): cannot reduce tree");
2380}
2381
2382
2383/*---------------------------------------------------------*/
2384/*--- ISEL: Integer expression auxiliaries              ---*/
2385/*---------------------------------------------------------*/
2386
2387/* --------------------- AMODEs --------------------- */
2388
2389/* Return an AMode which computes the value of the specified
2390   expression, possibly also adding insns to the code list as a
2391   result.  The expression may only be a word-size one.
2392*/
2393
2394static Bool uInt_fits_in_16_bits ( UInt u )
2395{
2396   /* Is u the same as the sign-extend of its lower 16 bits? */
2397   Int i = u & 0xFFFF;
2398   i <<= 16;
2399   i >>= 16;
2400   return toBool(u == (UInt)i);
2401}
2402
2403static Bool uLong_fits_in_16_bits ( ULong u )
2404{
2405   /* Is u the same as the sign-extend of its lower 16 bits? */
2406   Long i = u & 0xFFFFULL;
2407   i <<= 48;
2408   i >>= 48;
2409   return toBool(u == (ULong)i);
2410}
2411
2412static Bool uLong_is_4_aligned ( ULong u )
2413{
2414   return toBool((u & 3ULL) == 0);
2415}
2416
2417static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
2418{
2419   Bool mode64 = env->mode64;
2420   switch (am->tag) {
2421   case Pam_IR:
2422      /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
2423         somehow, but I think it's OK. */
2424      return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
2425                     hregIsVirtual(am->Pam.IR.base) &&
2426                     uInt_fits_in_16_bits(am->Pam.IR.index) );
2427   case Pam_RR:
2428      return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
2429                     hregIsVirtual(am->Pam.RR.base) &&
2430                     hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
2431                     hregIsVirtual(am->Pam.RR.index) );
2432   default:
2433      vpanic("sane_AMode: unknown ppc amode tag");
2434   }
2435}
2436
2437static
2438PPCAMode* iselWordExpr_AMode ( ISelEnv* env, IRExpr* e, IRType xferTy )
2439{
2440   PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy);
2441   vassert(sane_AMode(env, am));
2442   return am;
2443}
2444
2445/* DO NOT CALL THIS DIRECTLY ! */
2446static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType xferTy )
2447{
2448   IRType ty = typeOfIRExpr(env->type_env,e);
2449
2450   if (env->mode64) {
2451
2452      /* If the data load/store type is I32 or I64, this amode might
2453         be destined for use in ld/ldu/lwa/st/stu.  In which case
2454         insist that if it comes out as an _IR, the immediate must
2455         have its bottom two bits be zero.  This does assume that for
2456         any other type (I8/I16/I128/F32/F64/V128) the amode will not
2457         be parked in any such instruction.  But that seems a
2458         reasonable assumption.  */
2459      Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
2460
2461      vassert(ty == Ity_I64);
2462
2463      /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
2464      if (e->tag == Iex_Binop
2465          && e->Iex.Binop.op == Iop_Add64
2466          && e->Iex.Binop.arg2->tag == Iex_Const
2467          && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2468          && (aligned4imm  ? uLong_is_4_aligned(e->Iex.Binop.arg2
2469                                                 ->Iex.Const.con->Ico.U64)
2470                           : True)
2471          && uLong_fits_in_16_bits(e->Iex.Binop.arg2
2472                                    ->Iex.Const.con->Ico.U64)) {
2473         return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
2474                             iselWordExpr_R(env, e->Iex.Binop.arg1) );
2475      }
2476
2477      /* Add64(expr,expr) */
2478      if (e->tag == Iex_Binop
2479          && e->Iex.Binop.op == Iop_Add64) {
2480         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1);
2481         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2);
2482         return PPCAMode_RR( r_idx, r_base );
2483      }
2484
2485   } else {
2486
2487      vassert(ty == Ity_I32);
2488
2489      /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
2490      if (e->tag == Iex_Binop
2491          && e->Iex.Binop.op == Iop_Add32
2492          && e->Iex.Binop.arg2->tag == Iex_Const
2493          && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
2494          && uInt_fits_in_16_bits(e->Iex.Binop.arg2
2495                                   ->Iex.Const.con->Ico.U32)) {
2496         return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
2497                             iselWordExpr_R(env, e->Iex.Binop.arg1) );
2498      }
2499
2500      /* Add32(expr,expr) */
2501      if (e->tag == Iex_Binop
2502          && e->Iex.Binop.op == Iop_Add32) {
2503         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1);
2504         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2);
2505         return PPCAMode_RR( r_idx, r_base );
2506      }
2507
2508   }
2509
2510   /* Doesn't match anything in particular.  Generate it into
2511      a register and use that. */
2512   return PPCAMode_IR( 0, iselWordExpr_R(env,e) );
2513}
2514
2515
2516/* --------------------- RH --------------------- */
2517
2518/* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
2519   (reg-or-halfword-immediate).  It's important to specify whether the
2520   immediate is to be regarded as signed or not.  If yes, this will
2521   never return -32768 as an immediate; this guaranteed that all
2522   signed immediates that are return can have their sign inverted if
2523   need be. */
2524
2525static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, IRExpr* e )
2526{
2527   PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e);
2528   /* sanity checks ... */
2529   switch (ri->tag) {
2530   case Prh_Imm:
2531      vassert(ri->Prh.Imm.syned == syned);
2532      if (syned)
2533         vassert(ri->Prh.Imm.imm16 != 0x8000);
2534      return ri;
2535   case Prh_Reg:
2536      vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2537      vassert(hregIsVirtual(ri->Prh.Reg.reg));
2538      return ri;
2539   default:
2540      vpanic("iselIntExpr_RH: unknown ppc RH tag");
2541   }
2542}
2543
2544/* DO NOT CALL THIS DIRECTLY ! */
2545static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, IRExpr* e )
2546{
2547   ULong u;
2548   Long  l;
2549   IRType ty = typeOfIRExpr(env->type_env,e);
2550   vassert(ty == Ity_I8  || ty == Ity_I16 ||
2551           ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2552
2553   /* special case: immediate */
2554   if (e->tag == Iex_Const) {
2555      IRConst* con = e->Iex.Const.con;
2556      /* What value are we aiming to generate? */
2557      switch (con->tag) {
2558      /* Note: Not sign-extending - we carry 'syned' around */
2559      case Ico_U64: vassert(env->mode64);
2560                    u =              con->Ico.U64; break;
2561      case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
2562      case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
2563      case Ico_U8:  u = 0x000000FF & con->Ico.U8; break;
2564      default:      vpanic("iselIntExpr_RH.Iex_Const(ppch)");
2565      }
2566      l = (Long)u;
2567      /* Now figure out if it's representable. */
2568      if (!syned && u <= 65535) {
2569         return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
2570      }
2571      if (syned && l >= -32767 && l <= 32767) {
2572         return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
2573      }
2574      /* no luck; use the Slow Way. */
2575   }
2576
2577   /* default case: calculate into a register and return that */
2578   return PPCRH_Reg( iselWordExpr_R ( env, e ) );
2579}
2580
2581
2582/* --------------------- RIs --------------------- */
2583
2584/* Calculate an expression into an PPCRI operand.  As with
2585   iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
2586   in 64-bit mode, 64 bits. */
2587
2588static PPCRI* iselWordExpr_RI ( ISelEnv* env, IRExpr* e )
2589{
2590   PPCRI* ri = iselWordExpr_RI_wrk(env, e);
2591   /* sanity checks ... */
2592   switch (ri->tag) {
2593   case Pri_Imm:
2594      return ri;
2595   case Pri_Reg:
2596      vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
2597      vassert(hregIsVirtual(ri->Pri.Reg));
2598      return ri;
2599   default:
2600      vpanic("iselIntExpr_RI: unknown ppc RI tag");
2601   }
2602}
2603
2604/* DO NOT CALL THIS DIRECTLY ! */
2605static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2606{
2607   Long  l;
2608   IRType ty = typeOfIRExpr(env->type_env,e);
2609   vassert(ty == Ity_I8  || ty == Ity_I16 ||
2610           ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2611
2612   /* special case: immediate */
2613   if (e->tag == Iex_Const) {
2614      IRConst* con = e->Iex.Const.con;
2615      switch (con->tag) {
2616      case Ico_U64: vassert(env->mode64);
2617                    l = (Long)            con->Ico.U64; break;
2618      case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2619      case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2620      case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2621      default:      vpanic("iselIntExpr_RI.Iex_Const(ppch)");
2622      }
2623      return PPCRI_Imm((ULong)l);
2624   }
2625
2626   /* default case: calculate into a register and return that */
2627   return PPCRI_Reg( iselWordExpr_R ( env, e ) );
2628}
2629
2630
2631/* --------------------- RH5u --------------------- */
2632
2633/* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
2634   being an immediate in the range 1 .. 31 inclusive.  Used for doing
2635   shift amounts.  Only used in 32-bit mode. */
2636
2637static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, IRExpr* e )
2638{
2639   PPCRH* ri;
2640   vassert(!env->mode64);
2641   ri = iselWordExpr_RH5u_wrk(env, e);
2642   /* sanity checks ... */
2643   switch (ri->tag) {
2644   case Prh_Imm:
2645      vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
2646      vassert(!ri->Prh.Imm.syned);
2647      return ri;
2648   case Prh_Reg:
2649      vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2650      vassert(hregIsVirtual(ri->Prh.Reg.reg));
2651      return ri;
2652   default:
2653      vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
2654   }
2655}
2656
2657/* DO NOT CALL THIS DIRECTLY ! */
2658static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e )
2659{
2660   IRType ty = typeOfIRExpr(env->type_env,e);
2661   vassert(ty == Ity_I8);
2662
2663   /* special case: immediate */
2664   if (e->tag == Iex_Const
2665       && e->Iex.Const.con->tag == Ico_U8
2666       && e->Iex.Const.con->Ico.U8 >= 1
2667       && e->Iex.Const.con->Ico.U8 <= 31) {
2668      return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
2669   }
2670
2671   /* default case: calculate into a register and return that */
2672   return PPCRH_Reg( iselWordExpr_R ( env, e ) );
2673}
2674
2675
2676/* --------------------- RH6u --------------------- */
2677
2678/* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
2679   being an immediate in the range 1 .. 63 inclusive.  Used for doing
2680   shift amounts.  Only used in 64-bit mode. */
2681
2682static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, IRExpr* e )
2683{
2684   PPCRH* ri;
2685   vassert(env->mode64);
2686   ri = iselWordExpr_RH6u_wrk(env, e);
2687   /* sanity checks ... */
2688   switch (ri->tag) {
2689   case Prh_Imm:
2690      vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
2691      vassert(!ri->Prh.Imm.syned);
2692      return ri;
2693   case Prh_Reg:
2694      vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2695      vassert(hregIsVirtual(ri->Prh.Reg.reg));
2696      return ri;
2697   default:
2698      vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
2699   }
2700}
2701
2702/* DO NOT CALL THIS DIRECTLY ! */
2703static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e )
2704{
2705   IRType ty = typeOfIRExpr(env->type_env,e);
2706   vassert(ty == Ity_I8);
2707
2708   /* special case: immediate */
2709   if (e->tag == Iex_Const
2710       && e->Iex.Const.con->tag == Ico_U8
2711       && e->Iex.Const.con->Ico.U8 >= 1
2712       && e->Iex.Const.con->Ico.U8 <= 63) {
2713      return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
2714   }
2715
2716   /* default case: calculate into a register and return that */
2717   return PPCRH_Reg( iselWordExpr_R ( env, e ) );
2718}
2719
2720
2721/* --------------------- CONDCODE --------------------- */
2722
2723/* Generate code to evaluated a bit-typed expression, returning the
2724   condition code which would correspond when the expression would
2725   notionally have returned 1. */
2726
2727static PPCCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2728{
2729   /* Uh, there's nothing we can sanity check here, unfortunately. */
2730   return iselCondCode_wrk(env,e);
2731}
2732
2733/* DO NOT CALL THIS DIRECTLY ! */
2734static PPCCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2735{
2736   vassert(e);
2737   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2738
2739   /* Constant 1:Bit */
2740   if (e->tag == Iex_Const && e->Iex.Const.con->Ico.U1 == True) {
2741      // Make a compare that will always be true:
2742      HReg r_zero = newVRegI(env);
2743      addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
2744      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2745                                 7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
2746      return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
2747   }
2748
2749   /* Not1(...) */
2750   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2751      /* Generate code for the arg, and negate the test condition */
2752      PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2753      cond.test = invertCondTest(cond.test);
2754      return cond;
2755   }
2756
2757   /* --- patterns rooted at: 32to1 or 64to1 --- */
2758
2759   /* 32to1, 64to1 */
2760   if (e->tag == Iex_Unop &&
2761       (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
2762      HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
2763      HReg tmp = newVRegI(env);
2764      /* could do better, probably -- andi. */
2765      addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
2766                                 src, PPCRH_Imm(False,1)));
2767      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2768                                 7/*cr*/, tmp, PPCRH_Imm(False,1)));
2769      return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
2770   }
2771
2772   /* --- patterns rooted at: CmpNEZ8 --- */
2773
2774   /* CmpNEZ8(x) */
2775   /* Note this cloned as CmpNE8(x,0) below. */
2776   /* could do better -- andi. */
2777   if (e->tag == Iex_Unop
2778       && e->Iex.Unop.op == Iop_CmpNEZ8) {
2779      HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg);
2780      HReg tmp = newVRegI(env);
2781      addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
2782                                 PPCRH_Imm(False,0xFF)));
2783      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2784                                 7/*cr*/, tmp, PPCRH_Imm(False,0)));
2785      return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2786   }
2787
2788   /* --- patterns rooted at: CmpNEZ32 --- */
2789
2790   /* CmpNEZ32(x) */
2791   if (e->tag == Iex_Unop
2792       && e->Iex.Unop.op == Iop_CmpNEZ32) {
2793      HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg);
2794      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2795                                 7/*cr*/, r1, PPCRH_Imm(False,0)));
2796      return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2797   }
2798
2799   /* --- patterns rooted at: Cmp*32* --- */
2800
2801   /* Cmp*32*(x,y) */
2802   if (e->tag == Iex_Binop
2803       && (e->Iex.Binop.op == Iop_CmpEQ32
2804           || e->Iex.Binop.op == Iop_CmpNE32
2805           || e->Iex.Binop.op == Iop_CmpLT32S
2806           || e->Iex.Binop.op == Iop_CmpLT32U
2807           || e->Iex.Binop.op == Iop_CmpLE32S
2808           || e->Iex.Binop.op == Iop_CmpLE32U)) {
2809      Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
2810                    e->Iex.Binop.op == Iop_CmpLE32S);
2811      HReg   r1  = iselWordExpr_R(env, e->Iex.Binop.arg1);
2812      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
2813      addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
2814                                 7/*cr*/, r1, ri2));
2815
2816      switch (e->Iex.Binop.op) {
2817      case Iop_CmpEQ32:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
2818      case Iop_CmpNE32:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2819      case Iop_CmpLT32U: case Iop_CmpLT32S:
2820         return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
2821      case Iop_CmpLE32U: case Iop_CmpLE32S:
2822         return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
2823      default: vpanic("iselCondCode(ppc): CmpXX32");
2824      }
2825   }
2826
2827   /* --- patterns rooted at: CmpNEZ64 --- */
2828
2829   /* CmpNEZ64 */
2830   if (e->tag == Iex_Unop
2831       && e->Iex.Unop.op == Iop_CmpNEZ64) {
2832      if (!env->mode64) {
2833         HReg hi, lo;
2834         HReg tmp = newVRegI(env);
2835         iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
2836         addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
2837         addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
2838                                    7/*cr*/, tmp,PPCRH_Imm(False,0)));
2839         return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2840      } else {  // mode64
2841         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
2842         addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
2843                                    7/*cr*/, r_src,PPCRH_Imm(False,0)));
2844         return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2845      }
2846   }
2847
2848   /* --- patterns rooted at: Cmp*64* --- */
2849
2850   /* Cmp*64*(x,y) */
2851   if (e->tag == Iex_Binop
2852       && (e->Iex.Binop.op == Iop_CmpEQ64
2853           || e->Iex.Binop.op == Iop_CmpNE64
2854           || e->Iex.Binop.op == Iop_CmpLT64S
2855           || e->Iex.Binop.op == Iop_CmpLT64U
2856           || e->Iex.Binop.op == Iop_CmpLE64S
2857           || e->Iex.Binop.op == Iop_CmpLE64U)) {
2858      Bool   syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
2859                      e->Iex.Binop.op == Iop_CmpLE64S);
2860      HReg    r1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
2861      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
2862      vassert(env->mode64);
2863      addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
2864                                 7/*cr*/, r1, ri2));
2865
2866      switch (e->Iex.Binop.op) {
2867      case Iop_CmpEQ64:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
2868      case Iop_CmpNE64:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2869      case Iop_CmpLT64U: return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
2870      case Iop_CmpLE64U: return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
2871      default: vpanic("iselCondCode(ppc): CmpXX64");
2872      }
2873   }
2874
2875   /* --- patterns rooted at: CmpNE8 --- */
2876
2877   /* CmpNE8(x,0) */
2878   /* Note this is a direct copy of CmpNEZ8 above. */
2879   /* could do better -- andi. */
2880   if (e->tag == Iex_Binop
2881       && e->Iex.Binop.op == Iop_CmpNE8
2882       && isZeroU8(e->Iex.Binop.arg2)) {
2883      HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1);
2884      HReg tmp = newVRegI(env);
2885      addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
2886                                 PPCRH_Imm(False,0xFF)));
2887      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2888                                 7/*cr*/, tmp, PPCRH_Imm(False,0)));
2889      return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2890   }
2891
2892   /* var */
2893   if (e->tag == Iex_RdTmp) {
2894      HReg r_src      = lookupIRTemp(env, e->Iex.RdTmp.tmp);
2895      HReg src_masked = newVRegI(env);
2896      addInstr(env,
2897               PPCInstr_Alu(Palu_AND, src_masked,
2898                            r_src, PPCRH_Imm(False,1)));
2899      addInstr(env,
2900               PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2901                            7/*cr*/, src_masked, PPCRH_Imm(False,1)));
2902      return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
2903   }
2904
2905   vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
2906   ppIRExpr(e);
2907   vpanic("iselCondCode(ppc)");
2908}
2909
2910
2911/*---------------------------------------------------------*/
2912/*--- ISEL: Integer expressions (128 bit)               ---*/
2913/*---------------------------------------------------------*/
2914
2915/* 64-bit mode ONLY: compute a 128-bit value into a register pair,
2916   which is returned as the first two parameters.  As with
2917   iselWordExpr_R, these may be either real or virtual regs; in any
2918   case they must not be changed by subsequent code emitted by the
2919   caller.  */
2920
2921static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2922                             ISelEnv* env, IRExpr* e )
2923{
2924   vassert(env->mode64);
2925   iselInt128Expr_wrk(rHi, rLo, env, e);
2926#  if 0
2927   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2928#  endif
2929   vassert(hregClass(*rHi) == HRcGPR(env->mode64));
2930   vassert(hregIsVirtual(*rHi));
2931   vassert(hregClass(*rLo) == HRcGPR(env->mode64));
2932   vassert(hregIsVirtual(*rLo));
2933}
2934
2935/* DO NOT CALL THIS DIRECTLY ! */
2936static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2937                                 ISelEnv* env, IRExpr* e )
2938{
2939   vassert(e);
2940   vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2941
2942   /* read 128-bit IRTemp */
2943   if (e->tag == Iex_RdTmp) {
2944      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
2945      return;
2946   }
2947
2948   /* --------- BINARY ops --------- */
2949   if (e->tag == Iex_Binop) {
2950      switch (e->Iex.Binop.op) {
2951      /* 64 x 64 -> 128 multiply */
2952      case Iop_MullU64:
2953      case Iop_MullS64: {
2954         HReg     tLo     = newVRegI(env);
2955         HReg     tHi     = newVRegI(env);
2956         Bool     syned   = toBool(e->Iex.Binop.op == Iop_MullS64);
2957         HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
2958         HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2);
2959         addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
2960                                     False/*lo64*/, False/*64bit mul*/,
2961                                     tLo, r_srcL, r_srcR));
2962         addInstr(env, PPCInstr_MulL(syned,
2963                                     True/*hi64*/, False/*64bit mul*/,
2964                                     tHi, r_srcL, r_srcR));
2965         *rHi = tHi;
2966         *rLo = tLo;
2967         return;
2968      }
2969
2970      /* 64HLto128(e1,e2) */
2971      case Iop_64HLto128:
2972         *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
2973         *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
2974         return;
2975      default:
2976         break;
2977      }
2978   } /* if (e->tag == Iex_Binop) */
2979
2980
2981   /* --------- UNARY ops --------- */
2982   if (e->tag == Iex_Unop) {
2983      switch (e->Iex.Unop.op) {
2984      default:
2985         break;
2986      }
2987   } /* if (e->tag == Iex_Unop) */
2988
2989   vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
2990   ppIRExpr(e);
2991   vpanic("iselInt128Expr(ppc64)");
2992}
2993
2994
2995/*---------------------------------------------------------*/
2996/*--- ISEL: Integer expressions (64 bit)                ---*/
2997/*---------------------------------------------------------*/
2998
2999/* 32-bit mode ONLY: compute a 128-bit value into a register quad */
3000static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
3001                                     HReg* rLo, ISelEnv* env, IRExpr* e )
3002{
3003   vassert(!env->mode64);
3004   iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e);
3005#  if 0
3006   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3007#  endif
3008   vassert(hregClass(*rHi) == HRcInt32);
3009   vassert(hregIsVirtual(*rHi));
3010   vassert(hregClass(*rMedHi) == HRcInt32);
3011   vassert(hregIsVirtual(*rMedHi));
3012   vassert(hregClass(*rMedLo) == HRcInt32);
3013   vassert(hregIsVirtual(*rMedLo));
3014   vassert(hregClass(*rLo) == HRcInt32);
3015   vassert(hregIsVirtual(*rLo));
3016}
3017
3018static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
3019                                         HReg* rMedLo, HReg* rLo,
3020                                         ISelEnv* env, IRExpr* e )
3021{
3022   vassert(e);
3023   vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3024
3025   /* read 128-bit IRTemp */
3026   if (e->tag == Iex_RdTmp) {
3027      lookupIRTempQuad( rHi, rMedHi, rMedLo, rLo, env, e->Iex.RdTmp.tmp);
3028      return;
3029   }
3030
3031   if (e->tag == Iex_Binop) {
3032
3033      IROp op_binop = e->Iex.Binop.op;
3034      switch (op_binop) {
3035      case Iop_64HLto128:
3036         iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1);
3037         iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2);
3038         return;
3039      default:
3040         vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
3041                    op_binop);
3042         break;
3043      }
3044   }
3045
3046   vex_printf("iselInt128Expr_to_32x4_wrk: e->tag 0x%x not found\n", e->tag);
3047   return;
3048}
3049
3050/* 32-bit mode ONLY: compute a 64-bit value into a register pair,
3051   which is returned as the first two parameters.  As with
3052   iselIntExpr_R, these may be either real or virtual regs; in any
3053   case they must not be changed by subsequent code emitted by the
3054   caller.  */
3055
3056static void iselInt64Expr ( HReg* rHi, HReg* rLo,
3057                            ISelEnv* env, IRExpr* e )
3058{
3059   vassert(!env->mode64);
3060   iselInt64Expr_wrk(rHi, rLo, env, e);
3061#  if 0
3062   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3063#  endif
3064   vassert(hregClass(*rHi) == HRcInt32);
3065   vassert(hregIsVirtual(*rHi));
3066   vassert(hregClass(*rLo) == HRcInt32);
3067   vassert(hregIsVirtual(*rLo));
3068}
3069
3070/* DO NOT CALL THIS DIRECTLY ! */
3071static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
3072                                ISelEnv* env, IRExpr* e )
3073{
3074   vassert(e);
3075   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
3076
3077   /* 64-bit load */
3078   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
3079      HReg tLo    = newVRegI(env);
3080      HReg tHi    = newVRegI(env);
3081      HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
3082      vassert(!env->mode64);
3083      addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3084                                   tHi, PPCAMode_IR( 0, r_addr ),
3085                                   False/*32-bit insn please*/) );
3086      addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3087                                   tLo, PPCAMode_IR( 4, r_addr ),
3088                                   False/*32-bit insn please*/) );
3089      *rHi = tHi;
3090      *rLo = tLo;
3091      return;
3092   }
3093
3094   /* 64-bit literal */
3095   if (e->tag == Iex_Const) {
3096      ULong w64 = e->Iex.Const.con->Ico.U64;
3097      UInt  wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
3098      UInt  wLo = ((UInt)w64) & 0xFFFFFFFF;
3099      HReg  tLo = newVRegI(env);
3100      HReg  tHi = newVRegI(env);
3101      vassert(e->Iex.Const.con->tag == Ico_U64);
3102      addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
3103      addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
3104      *rHi = tHi;
3105      *rLo = tLo;
3106      return;
3107   }
3108
3109   /* read 64-bit IRTemp */
3110   if (e->tag == Iex_RdTmp) {
3111      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3112      return;
3113   }
3114
3115   /* 64-bit GET */
3116   if (e->tag == Iex_Get) {
3117      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3118                                       GuestStatePtr(False/*mode32*/) );
3119      PPCAMode* am_addr4 = advance4(env, am_addr);
3120      HReg tLo = newVRegI(env);
3121      HReg tHi = newVRegI(env);
3122      addInstr(env, PPCInstr_Load( 4, tHi, am_addr,  False/*mode32*/ ));
3123      addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
3124      *rHi = tHi;
3125      *rLo = tLo;
3126      return;
3127   }
3128
3129   /* 64-bit ITE */
3130   if (e->tag == Iex_ITE) { // VFD
3131      HReg e0Lo, e0Hi, eXLo, eXHi;
3132      iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue);
3133      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
3134      HReg tLo = newVRegI(env);
3135      HReg tHi = newVRegI(env);
3136      addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
3137      addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
3138      PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
3139      addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
3140      addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
3141      *rHi = tHi;
3142      *rLo = tLo;
3143      return;
3144   }
3145
3146   /* --------- BINARY ops --------- */
3147   if (e->tag == Iex_Binop) {
3148      IROp op_binop = e->Iex.Binop.op;
3149      switch (op_binop) {
3150         /* 32 x 32 -> 64 multiply */
3151         case Iop_MullU32:
3152         case Iop_MullS32: {
3153            HReg     tLo     = newVRegI(env);
3154            HReg     tHi     = newVRegI(env);
3155            Bool     syned   = toBool(op_binop == Iop_MullS32);
3156            HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
3157            HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2);
3158            addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3159                                        False/*lo32*/, True/*32bit mul*/,
3160                                        tLo, r_srcL, r_srcR));
3161            addInstr(env, PPCInstr_MulL(syned,
3162                                        True/*hi32*/, True/*32bit mul*/,
3163                                        tHi, r_srcL, r_srcR));
3164            *rHi = tHi;
3165            *rLo = tLo;
3166            return;
3167         }
3168
3169         /* Or64/And64/Xor64 */
3170         case Iop_Or64:
3171         case Iop_And64:
3172         case Iop_Xor64: {
3173            HReg xLo, xHi, yLo, yHi;
3174            HReg tLo = newVRegI(env);
3175            HReg tHi = newVRegI(env);
3176            PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
3177                          (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
3178            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
3179            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
3180            addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
3181            addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
3182            *rHi = tHi;
3183            *rLo = tLo;
3184            return;
3185         }
3186
3187         /* Add64 */
3188         case Iop_Add64: {
3189            HReg xLo, xHi, yLo, yHi;
3190            HReg tLo = newVRegI(env);
3191            HReg tHi = newVRegI(env);
3192            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
3193            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
3194            addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
3195                                            tLo, xLo, yLo));
3196            addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
3197                                            tHi, xHi, yHi));
3198            *rHi = tHi;
3199            *rLo = tLo;
3200            return;
3201         }
3202
3203         /* 32HLto64(e1,e2) */
3204         case Iop_32HLto64:
3205            *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
3206            *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
3207            return;
3208
3209         /* F64toI64[S|U] */
3210         case Iop_F64toI64S: case Iop_F64toI64U: {
3211            HReg      tLo     = newVRegI(env);
3212            HReg      tHi     = newVRegI(env);
3213            HReg      r1      = StackFramePtr(env->mode64);
3214            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3215            PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3216            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2);
3217            HReg      ftmp    = newVRegF(env);
3218
3219            vassert(!env->mode64);
3220            /* Set host rounding mode */
3221            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3222
3223            sub_from_sp( env, 16 );
3224            addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
3225                                          (op_binop == Iop_F64toI64S) ? True : False,
3226                                          True, ftmp, fsrc));
3227            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3228            addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3229            addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3230            add_to_sp( env, 16 );
3231
3232            ///* Restore default FPU rounding. */
3233            //set_FPU_rounding_default( env );
3234            *rHi = tHi;
3235            *rLo = tLo;
3236            return;
3237         }
3238         case Iop_D64toI64S: {
3239            HReg      tLo     = newVRegI(env);
3240            HReg      tHi     = newVRegI(env);
3241            HReg      r1      = StackFramePtr(env->mode64);
3242            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3243            PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3244            HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
3245            HReg tmp    = newVRegF(env);
3246
3247            vassert(!env->mode64);
3248            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
3249            addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
3250
3251            sub_from_sp( env, 16 );
3252            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3253            addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3254            addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3255            add_to_sp( env, 16 );
3256            *rHi = tHi;
3257            *rLo = tLo;
3258            return;
3259         }
3260         case Iop_D128toI64S: {
3261            PPCFpOp fpop = Pfp_DCTFIXQ;
3262            HReg r_srcHi = newVRegF(env);
3263            HReg r_srcLo = newVRegF(env);
3264            HReg tLo     = newVRegI(env);
3265            HReg tHi     = newVRegI(env);
3266            HReg ftmp    = newVRegF(env);
3267            PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3268            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3269
3270            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
3271            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
3272            addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
3273
3274            // put the D64 result into an integer register pair
3275            sub_from_sp( env, 16 );
3276            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3277            addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3278            addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3279            add_to_sp( env, 16 );
3280            *rHi = tHi;
3281            *rLo = tLo;
3282            return;
3283         }
3284         default:
3285            break;
3286      }
3287   } /* if (e->tag == Iex_Binop) */
3288
3289
3290   /* --------- UNARY ops --------- */
3291   if (e->tag == Iex_Unop) {
3292      switch (e->Iex.Unop.op) {
3293
3294      /* CmpwNEZ64(e) */
3295      case Iop_CmpwNEZ64: {
3296         HReg argHi, argLo;
3297         HReg tmp1  = newVRegI(env);
3298         HReg tmp2  = newVRegI(env);
3299         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
3300         /* tmp1 = argHi | argLo */
3301         addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
3302         /* tmp2 = (tmp1 | -tmp1) >>s 31 */
3303         addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
3304         addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
3305         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3306                                     tmp2, tmp2, PPCRH_Imm(False, 31)));
3307         *rHi = tmp2;
3308         *rLo = tmp2; /* yes, really tmp2 */
3309         return;
3310      }
3311
3312      /* Left64 */
3313      case Iop_Left64: {
3314         HReg argHi, argLo;
3315         HReg zero32 = newVRegI(env);
3316         HReg resHi  = newVRegI(env);
3317         HReg resLo  = newVRegI(env);
3318         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
3319         vassert(env->mode64 == False);
3320         addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
3321         /* resHi:resLo = - argHi:argLo */
3322         addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
3323                                         resLo, zero32, argLo ));
3324         addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
3325                                         resHi, zero32, argHi ));
3326         /* resHi:resLo |= srcHi:srcLo */
3327         addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
3328         addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
3329         *rHi = resHi;
3330         *rLo = resLo;
3331         return;
3332      }
3333
3334      /* 32Sto64(e) */
3335      case Iop_32Sto64: {
3336         HReg tHi = newVRegI(env);
3337         HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
3338         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3339                                     tHi, src, PPCRH_Imm(False,31)));
3340         *rHi = tHi;
3341         *rLo = src;
3342         return;
3343      }
3344      case Iop_ExtractExpD64: {
3345         HReg tmp    = newVRegF(env);
3346         HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg);
3347         HReg      tLo     = newVRegI(env);
3348         HReg      tHi     = newVRegI(env);
3349         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3350         PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3351
3352         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
3353
3354         // put the D64 result into a integer register pair
3355         sub_from_sp( env, 16 );
3356         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3357         addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3358         addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3359         add_to_sp( env, 16 );
3360         *rHi = tHi;
3361         *rLo = tLo;
3362         return;
3363      }
3364      case Iop_ExtractExpD128: {
3365         HReg      r_srcHi;
3366         HReg      r_srcLo;
3367         HReg      tmp     = newVRegF(env);
3368         HReg      tLo     = newVRegI(env);
3369         HReg      tHi     = newVRegI(env);
3370         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3371         PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3372
3373         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
3374         addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
3375                                                  r_srcHi, r_srcLo));
3376
3377         // put the D64 result into a integer register pair
3378         sub_from_sp( env, 16 );
3379         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3380         addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3381         addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3382         add_to_sp( env, 16 );
3383         *rHi = tHi;
3384         *rLo = tLo;
3385         return;
3386      }
3387
3388      /* 32Uto64(e) */
3389      case Iop_32Uto64: {
3390         HReg tHi = newVRegI(env);
3391         HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg);
3392         addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
3393         *rHi = tHi;
3394         *rLo = tLo;
3395         return;
3396      }
3397
3398      case Iop_128to64: {
3399         /* Narrow, return the low 64-bit half as a 32-bit
3400          * register pair */
3401         HReg r_Hi    = INVALID_HREG;
3402         HReg r_MedHi = INVALID_HREG;
3403         HReg r_MedLo = INVALID_HREG;
3404         HReg r_Lo    = INVALID_HREG;
3405
3406         iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3407                                env, e->Iex.Unop.arg);
3408         *rHi = r_MedLo;
3409         *rLo = r_Lo;
3410         return;
3411      }
3412
3413      case Iop_128HIto64: {
3414         /* Narrow, return the high 64-bit half as a 32-bit
3415          *  register pair */
3416         HReg r_Hi    = INVALID_HREG;
3417         HReg r_MedHi = INVALID_HREG;
3418         HReg r_MedLo = INVALID_HREG;
3419         HReg r_Lo    = INVALID_HREG;
3420
3421         iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3422                                env, e->Iex.Unop.arg);
3423         *rHi = r_Hi;
3424         *rLo = r_MedHi;
3425         return;
3426      }
3427
3428      /* V128{HI}to64 */
3429      case Iop_V128HIto64:
3430      case Iop_V128to64: {
3431         HReg r_aligned16;
3432         Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
3433         HReg tLo = newVRegI(env);
3434         HReg tHi = newVRegI(env);
3435         HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
3436         PPCAMode *am_off0, *am_offLO, *am_offHI;
3437         sub_from_sp( env, 32 );     // Move SP down 32 bytes
3438
3439         // get a quadword aligned address within our stack space
3440         r_aligned16 = get_sp_aligned16( env );
3441         am_off0  = PPCAMode_IR( 0,     r_aligned16 );
3442         am_offHI = PPCAMode_IR( off,   r_aligned16 );
3443         am_offLO = PPCAMode_IR( off+4, r_aligned16 );
3444
3445         // store as Vec128
3446         addInstr(env,
3447                  PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
3448
3449         // load hi,lo words (of hi/lo half of vec) as Ity_I32's
3450         addInstr(env,
3451                  PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
3452         addInstr(env,
3453                  PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
3454
3455         add_to_sp( env, 32 );       // Reset SP
3456         *rHi = tHi;
3457         *rLo = tLo;
3458         return;
3459      }
3460
3461      /* could do better than this, but for now ... */
3462      case Iop_1Sto64: {
3463         HReg tLo = newVRegI(env);
3464         HReg tHi = newVRegI(env);
3465         PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
3466         addInstr(env, PPCInstr_Set(cond,tLo));
3467         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
3468                                     tLo, tLo, PPCRH_Imm(False,31)));
3469         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3470                                     tLo, tLo, PPCRH_Imm(False,31)));
3471         addInstr(env, mk_iMOVds_RR(tHi, tLo));
3472         *rHi = tHi;
3473         *rLo = tLo;
3474         return;
3475      }
3476
3477      case Iop_Not64: {
3478         HReg xLo, xHi;
3479         HReg tmpLo = newVRegI(env);
3480         HReg tmpHi = newVRegI(env);
3481         iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
3482         addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
3483         addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
3484         *rHi = tmpHi;
3485         *rLo = tmpLo;
3486         return;
3487      }
3488
3489      /* ReinterpF64asI64(e) */
3490      /* Given an IEEE754 double, produce an I64 with the same bit
3491         pattern. */
3492      case Iop_ReinterpF64asI64: {
3493         PPCAMode *am_addr0, *am_addr1;
3494         HReg fr_src  = iselDblExpr(env, e->Iex.Unop.arg);
3495         HReg r_dstLo = newVRegI(env);
3496         HReg r_dstHi = newVRegI(env);
3497
3498         sub_from_sp( env, 16 );     // Move SP down 16 bytes
3499         am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
3500         am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
3501
3502         // store as F64
3503         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
3504                                        fr_src, am_addr0 ));
3505
3506         // load hi,lo as Ity_I32's
3507         addInstr(env, PPCInstr_Load( 4, r_dstHi,
3508                                      am_addr0, False/*mode32*/ ));
3509         addInstr(env, PPCInstr_Load( 4, r_dstLo,
3510                                      am_addr1, False/*mode32*/ ));
3511         *rHi = r_dstHi;
3512         *rLo = r_dstLo;
3513
3514         add_to_sp( env, 16 );       // Reset SP
3515         return;
3516      }
3517
3518      case Iop_ReinterpD64asI64: {
3519         HReg fr_src  = iselDfp64Expr(env, e->Iex.Unop.arg);
3520         PPCAMode *am_addr0, *am_addr1;
3521         HReg r_dstLo = newVRegI(env);
3522         HReg r_dstHi = newVRegI(env);
3523
3524
3525         sub_from_sp( env, 16 );     // Move SP down 16 bytes
3526         am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
3527         am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
3528
3529         // store as D64
3530         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
3531                                        fr_src, am_addr0 ));
3532
3533         // load hi,lo as Ity_I32's
3534         addInstr(env, PPCInstr_Load( 4, r_dstHi,
3535                                      am_addr0, False/*mode32*/ ));
3536         addInstr(env, PPCInstr_Load( 4, r_dstLo,
3537                                      am_addr1, False/*mode32*/ ));
3538         *rHi = r_dstHi;
3539         *rLo = r_dstLo;
3540
3541         add_to_sp( env, 16 );       // Reset SP
3542
3543         return;
3544      }
3545
3546      case Iop_BCDtoDPB: {
3547         PPCCondCode cc;
3548         UInt        argiregs;
3549         HReg        argregs[2];
3550         Int         argreg;
3551         HReg        tLo = newVRegI(env);
3552         HReg        tHi = newVRegI(env);
3553         HReg        tmpHi;
3554         HReg        tmpLo;
3555         ULong       target;
3556         Bool        mode64 = env->mode64;
3557
3558         argregs[0] = hregPPC_GPR3(mode64);
3559         argregs[1] = hregPPC_GPR4(mode64);
3560
3561         argiregs = 0;
3562         argreg = 0;
3563
3564         iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg );
3565
3566         argiregs |= ( 1 << (argreg+3 ) );
3567         addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
3568
3569         argiregs |= ( 1 << (argreg+3 ) );
3570         addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
3571
3572         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
3573         target = toUInt( Ptr_to_ULong(h_calc_BCDtoDPB ) );
3574
3575         addInstr( env, PPCInstr_Call( cc, (Addr64)target,
3576                                       argiregs,
3577                                       mk_RetLoc_simple(RLPri_2Int) ) );
3578         addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
3579         addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
3580
3581         *rHi = tHi;
3582         *rLo = tLo;
3583         return;
3584      }
3585
3586      case Iop_DPBtoBCD: {
3587         PPCCondCode cc;
3588         UInt        argiregs;
3589         HReg        argregs[2];
3590         Int         argreg;
3591         HReg        tLo = newVRegI(env);
3592         HReg        tHi = newVRegI(env);
3593         HReg        tmpHi;
3594         HReg        tmpLo;
3595         ULong       target;
3596         Bool        mode64 = env->mode64;
3597
3598         argregs[0] = hregPPC_GPR3(mode64);
3599         argregs[1] = hregPPC_GPR4(mode64);
3600
3601         argiregs = 0;
3602         argreg = 0;
3603
3604         iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg);
3605
3606         argiregs |= (1 << (argreg+3));
3607         addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
3608
3609         argiregs |= (1 << (argreg+3));
3610         addInstr(env, mk_iMOVds_RR( argregs[argreg], tmpLo));
3611
3612         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
3613
3614         target = toUInt( Ptr_to_ULong( h_calc_DPBtoBCD ) );
3615
3616         addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs,
3617                                      mk_RetLoc_simple(RLPri_2Int) ) );
3618         addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
3619         addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
3620
3621         *rHi = tHi;
3622         *rLo = tLo;
3623         return;
3624      }
3625
3626      default:
3627         break;
3628      }
3629   } /* if (e->tag == Iex_Unop) */
3630
3631   vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
3632   ppIRExpr(e);
3633   vpanic("iselInt64Expr(ppc)");
3634}
3635
3636
3637/*---------------------------------------------------------*/
3638/*--- ISEL: Floating point expressions (32 bit)         ---*/
3639/*---------------------------------------------------------*/
3640
3641/* Nothing interesting here; really just wrappers for
3642   64-bit stuff. */
3643
3644static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3645{
3646   HReg r = iselFltExpr_wrk( env, e );
3647#  if 0
3648   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3649#  endif
3650   vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
3651   vassert(hregIsVirtual(r));
3652   return r;
3653}
3654
3655/* DO NOT CALL THIS DIRECTLY */
3656static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3657{
3658   Bool        mode64 = env->mode64;
3659
3660   IRType ty = typeOfIRExpr(env->type_env,e);
3661   vassert(ty == Ity_F32);
3662
3663   if (e->tag == Iex_RdTmp) {
3664      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3665   }
3666
3667   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
3668      PPCAMode* am_addr;
3669      HReg r_dst = newVRegF(env);
3670      vassert(e->Iex.Load.ty == Ity_F32);
3671      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/);
3672      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
3673      return r_dst;
3674   }
3675
3676   if (e->tag == Iex_Get) {
3677      HReg r_dst = newVRegF(env);
3678      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3679                                       GuestStatePtr(env->mode64) );
3680      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
3681      return r_dst;
3682   }
3683
3684   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
3685      /* This is quite subtle.  The only way to do the relevant
3686         truncation is to do a single-precision store and then a
3687         double precision load to get it back into a register.  The
3688         problem is, if the data is then written to memory a second
3689         time, as in
3690
3691            STbe(...) = TruncF64asF32(...)
3692
3693         then will the second truncation further alter the value?  The
3694         answer is no: flds (as generated here) followed by fsts
3695         (generated for the STbe) is the identity function on 32-bit
3696         floats, so we are safe.
3697
3698         Another upshot of this is that if iselStmt can see the
3699         entirety of
3700
3701            STbe(...) = TruncF64asF32(arg)
3702
3703         then it can short circuit having to deal with TruncF64asF32
3704         individually; instead just compute arg into a 64-bit FP
3705         register and do 'fsts' (since that itself does the
3706         truncation).
3707
3708         We generate pretty poor code here (should be ok both for
3709         32-bit and 64-bit mode); but it is expected that for the most
3710         part the latter optimisation will apply and hence this code
3711         will not often be used.
3712      */
3713      HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg);
3714      HReg      fdst    = newVRegF(env);
3715      PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3716
3717      sub_from_sp( env, 16 );
3718      // store as F32, hence truncating
3719      addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
3720                                     fsrc, zero_r1 ));
3721      // and reload.  Good huh?! (sigh)
3722      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
3723                                     fdst, zero_r1 ));
3724      add_to_sp( env, 16 );
3725      return fdst;
3726   }
3727
3728   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
3729      if (mode64) {
3730         HReg fdst = newVRegF(env);
3731         HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
3732         HReg r1   = StackFramePtr(env->mode64);
3733         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3734
3735         /* Set host rounding mode */
3736         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3737
3738         sub_from_sp( env, 16 );
3739
3740         addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
3741         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
3742         addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
3743                                       False, False,
3744                                       fdst, fdst));
3745
3746         add_to_sp( env, 16 );
3747
3748         ///* Restore default FPU rounding. */
3749         //set_FPU_rounding_default( env );
3750         return fdst;
3751      } else {
3752         /* 32-bit mode */
3753         HReg fdst = newVRegF(env);
3754         HReg isrcHi, isrcLo;
3755         HReg r1   = StackFramePtr(env->mode64);
3756         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3757         PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3758
3759         iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2);
3760
3761         /* Set host rounding mode */
3762         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3763
3764         sub_from_sp( env, 16 );
3765
3766         addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
3767         addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
3768         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
3769         addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
3770                                       False, False,
3771                                       fdst, fdst));
3772
3773         add_to_sp( env, 16 );
3774
3775         ///* Restore default FPU rounding. */
3776         //set_FPU_rounding_default( env );
3777         return fdst;
3778      }
3779
3780   }
3781
3782   vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
3783   ppIRExpr(e);
3784   vpanic("iselFltExpr_wrk(ppc)");
3785}
3786
3787
3788/*---------------------------------------------------------*/
3789/*--- ISEL: Floating point expressions (64 bit)         ---*/
3790/*---------------------------------------------------------*/
3791
3792/* Compute a 64-bit floating point value into a register, the identity
3793   of which is returned.  As with iselIntExpr_R, the reg may be either
3794   real or virtual; in any case it must not be changed by subsequent
3795   code emitted by the caller.  */
3796
3797/* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
3798
3799    Type                  S (1 bit)   E (11 bits)   F (52 bits)
3800    ----                  ---------   -----------   -----------
3801    signalling NaN        u           2047 (max)    .0uuuuu---u
3802                                                    (with at least
3803                                                     one 1 bit)
3804    quiet NaN             u           2047 (max)    .1uuuuu---u
3805
3806    negative infinity     1           2047 (max)    .000000---0
3807
3808    positive infinity     0           2047 (max)    .000000---0
3809
3810    negative zero         1           0             .000000---0
3811
3812    positive zero         0           0             .000000---0
3813*/
3814
3815static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
3816{
3817   HReg r = iselDblExpr_wrk( env, e );
3818#  if 0
3819   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3820#  endif
3821   vassert(hregClass(r) == HRcFlt64);
3822   vassert(hregIsVirtual(r));
3823   return r;
3824}
3825
3826/* DO NOT CALL THIS DIRECTLY */
3827static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
3828{
3829   Bool mode64 = env->mode64;
3830   IRType ty = typeOfIRExpr(env->type_env,e);
3831   vassert(e);
3832   vassert(ty == Ity_F64);
3833
3834   if (e->tag == Iex_RdTmp) {
3835      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3836   }
3837
3838   /* --------- LITERAL --------- */
3839   if (e->tag == Iex_Const) {
3840      union { UInt u32x2[2]; ULong u64; Double f64; } u;
3841      vassert(sizeof(u) == 8);
3842      vassert(sizeof(u.u64) == 8);
3843      vassert(sizeof(u.f64) == 8);
3844      vassert(sizeof(u.u32x2) == 8);
3845
3846      if (e->Iex.Const.con->tag == Ico_F64) {
3847         u.f64 = e->Iex.Const.con->Ico.F64;
3848      }
3849      else if (e->Iex.Const.con->tag == Ico_F64i) {
3850         u.u64 = e->Iex.Const.con->Ico.F64i;
3851      }
3852      else
3853         vpanic("iselDblExpr(ppc): const");
3854
3855      if (!mode64) {
3856         HReg r_srcHi = newVRegI(env);
3857         HReg r_srcLo = newVRegI(env);
3858         addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
3859         addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
3860         return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
3861      } else { // mode64
3862         HReg r_src = newVRegI(env);
3863         addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
3864         return mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
3865      }
3866   }
3867
3868   /* --------- LOAD --------- */
3869   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
3870      HReg r_dst = newVRegF(env);
3871      PPCAMode* am_addr;
3872      vassert(e->Iex.Load.ty == Ity_F64);
3873      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/);
3874      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
3875      return r_dst;
3876   }
3877
3878   /* --------- GET --------- */
3879   if (e->tag == Iex_Get) {
3880      HReg r_dst = newVRegF(env);
3881      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3882                                       GuestStatePtr(mode64) );
3883      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
3884      return r_dst;
3885   }
3886
3887   /* --------- OPS --------- */
3888   if (e->tag == Iex_Qop) {
3889      PPCFpOp fpop = Pfp_INVALID;
3890      switch (e->Iex.Qop.details->op) {
3891         case Iop_MAddF64:    fpop = Pfp_MADDD; break;
3892         case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
3893         case Iop_MSubF64:    fpop = Pfp_MSUBD; break;
3894         case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
3895         default: break;
3896      }
3897      if (fpop != Pfp_INVALID) {
3898         HReg r_dst  = newVRegF(env);
3899         HReg r_srcML  = iselDblExpr(env, e->Iex.Qop.details->arg2);
3900         HReg r_srcMR  = iselDblExpr(env, e->Iex.Qop.details->arg3);
3901         HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4);
3902         set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1 );
3903         addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
3904                                               r_srcML, r_srcMR, r_srcAcc));
3905         return r_dst;
3906      }
3907   }
3908
3909   if (e->tag == Iex_Triop) {
3910      IRTriop *triop = e->Iex.Triop.details;
3911      PPCFpOp fpop = Pfp_INVALID;
3912      switch (triop->op) {
3913         case Iop_AddF64:    fpop = Pfp_ADDD; break;
3914         case Iop_SubF64:    fpop = Pfp_SUBD; break;
3915         case Iop_MulF64:    fpop = Pfp_MULD; break;
3916         case Iop_DivF64:    fpop = Pfp_DIVD; break;
3917         case Iop_AddF64r32: fpop = Pfp_ADDS; break;
3918         case Iop_SubF64r32: fpop = Pfp_SUBS; break;
3919         case Iop_MulF64r32: fpop = Pfp_MULS; break;
3920         case Iop_DivF64r32: fpop = Pfp_DIVS; break;
3921         default: break;
3922      }
3923      if (fpop != Pfp_INVALID) {
3924         HReg r_dst  = newVRegF(env);
3925         HReg r_srcL = iselDblExpr(env, triop->arg2);
3926         HReg r_srcR = iselDblExpr(env, triop->arg3);
3927         set_FPU_rounding_mode( env, triop->arg1 );
3928         addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
3929         return r_dst;
3930      }
3931   }
3932
3933   if (e->tag == Iex_Binop) {
3934      PPCFpOp fpop = Pfp_INVALID;
3935      switch (e->Iex.Binop.op) {
3936      case Iop_SqrtF64:   fpop = Pfp_SQRT;   break;
3937      default: break;
3938      }
3939      if (fpop == Pfp_SQRT) {
3940         HReg fr_dst = newVRegF(env);
3941         HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2);
3942         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3943         addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
3944         return fr_dst;
3945      }
3946   }
3947
3948   if (e->tag == Iex_Binop) {
3949
3950      if (e->Iex.Binop.op == Iop_RoundF64toF32) {
3951         HReg r_dst = newVRegF(env);
3952         HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2);
3953         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3954         addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
3955         //set_FPU_rounding_default( env );
3956         return r_dst;
3957      }
3958
3959      if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
3960         if (mode64) {
3961            HReg fdst = newVRegF(env);
3962            HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
3963            HReg r1   = StackFramePtr(env->mode64);
3964            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3965
3966            /* Set host rounding mode */
3967            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3968
3969            sub_from_sp( env, 16 );
3970
3971            addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
3972            addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
3973            addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
3974                                          e->Iex.Binop.op == Iop_I64StoF64,
3975                                          True/*fdst is 64 bit*/,
3976                                          fdst, fdst));
3977
3978            add_to_sp( env, 16 );
3979
3980            ///* Restore default FPU rounding. */
3981            //set_FPU_rounding_default( env );
3982            return fdst;
3983         } else {
3984            /* 32-bit mode */
3985            HReg fdst = newVRegF(env);
3986            HReg isrcHi, isrcLo;
3987            HReg r1   = StackFramePtr(env->mode64);
3988            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3989            PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3990
3991            iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2);
3992
3993            /* Set host rounding mode */
3994            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3995
3996            sub_from_sp( env, 16 );
3997
3998            addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
3999            addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4000            addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4001            addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4002                                          e->Iex.Binop.op == Iop_I64StoF64,
4003                                          True/*fdst is 64 bit*/,
4004                                          fdst, fdst));
4005
4006            add_to_sp( env, 16 );
4007
4008            ///* Restore default FPU rounding. */
4009            //set_FPU_rounding_default( env );
4010            return fdst;
4011         }
4012      }
4013
4014   }
4015
4016   if (e->tag == Iex_Unop) {
4017      PPCFpOp fpop = Pfp_INVALID;
4018      switch (e->Iex.Unop.op) {
4019         case Iop_NegF64:     fpop = Pfp_NEG; break;
4020         case Iop_AbsF64:     fpop = Pfp_ABS; break;
4021         case Iop_Est5FRSqrt: fpop = Pfp_RSQRTE; break;
4022         case Iop_RoundF64toF64_NegINF:  fpop = Pfp_FRIM; break;
4023         case Iop_RoundF64toF64_PosINF:  fpop = Pfp_FRIP; break;
4024         case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
4025         case Iop_RoundF64toF64_ZERO:    fpop = Pfp_FRIZ; break;
4026         default: break;
4027      }
4028      if (fpop != Pfp_INVALID) {
4029         HReg fr_dst = newVRegF(env);
4030         HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
4031         addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4032         return fr_dst;
4033      }
4034   }
4035
4036   if (e->tag == Iex_Unop) {
4037      switch (e->Iex.Unop.op) {
4038         case Iop_ReinterpI64asF64: {
4039            /* Given an I64, produce an IEEE754 double with the same
4040               bit pattern. */
4041            if (!mode64) {
4042               HReg r_srcHi, r_srcLo;
4043               iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
4044               return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4045            } else {
4046               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
4047               return mk_LoadR64toFPR( env, r_src );
4048            }
4049         }
4050
4051         case Iop_F32toF64: {
4052            if (e->Iex.Unop.arg->tag == Iex_Unop &&
4053                     e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
4054               e = e->Iex.Unop.arg;
4055
4056               HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
4057               HReg fr_dst = newVRegF(env);
4058               PPCAMode *am_addr;
4059
4060               sub_from_sp( env, 16 );        // Move SP down 16 bytes
4061               am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4062
4063               // store src as Ity_I32's
4064               addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
4065
4066               // load single precision float, but the end results loads into a
4067               // 64-bit FP register -- i.e., F64.
4068               addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
4069
4070               add_to_sp( env, 16 );          // Reset SP
4071               return fr_dst;
4072            }
4073
4074
4075            /* this is a no-op */
4076            HReg res = iselFltExpr(env, e->Iex.Unop.arg);
4077            return res;
4078         }
4079         default:
4080            break;
4081      }
4082   }
4083
4084   /* --------- MULTIPLEX --------- */
4085   if (e->tag == Iex_ITE) { // VFD
4086      if (ty == Ity_F64
4087          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
4088         HReg fr1    = iselDblExpr(env, e->Iex.ITE.iftrue);
4089         HReg fr0    = iselDblExpr(env, e->Iex.ITE.iffalse);
4090         HReg fr_dst = newVRegF(env);
4091         addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
4092         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
4093         addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
4094         return fr_dst;
4095      }
4096   }
4097
4098   vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
4099   ppIRExpr(e);
4100   vpanic("iselDblExpr_wrk(ppc)");
4101}
4102
4103static HReg iselDfp32Expr(ISelEnv* env, IRExpr* e)
4104{
4105   HReg r = iselDfp32Expr_wrk( env, e );
4106   vassert(hregClass(r) == HRcFlt64);
4107   vassert( hregIsVirtual(r) );
4108   return r;
4109}
4110
4111/* DO NOT CALL THIS DIRECTLY */
4112static HReg iselDfp32Expr_wrk(ISelEnv* env, IRExpr* e)
4113{
4114   Bool mode64 = env->mode64;
4115   IRType ty = typeOfIRExpr( env->type_env, e );
4116
4117   vassert( e );
4118   vassert( ty == Ity_D32 );
4119
4120   /* --------- GET --------- */
4121   if (e->tag == Iex_Get) {
4122      HReg r_dst = newVRegF( env );
4123      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4124                                       GuestStatePtr(mode64) );
4125      addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4126      return r_dst;
4127   }
4128
4129   /* --------- LOAD --------- */
4130   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
4131      PPCAMode* am_addr;
4132      HReg r_dst = newVRegF(env);
4133      vassert(e->Iex.Load.ty == Ity_D32);
4134      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/);
4135      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4136      return r_dst;
4137   }
4138
4139   /* --------- OPS --------- */
4140   if (e->tag == Iex_Binop) {
4141      if (e->Iex.Binop.op == Iop_D64toD32) {
4142         HReg fr_dst = newVRegF(env);
4143         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
4144         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
4145         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
4146         return fr_dst;
4147      }
4148   }
4149
4150   ppIRExpr( e );
4151   vpanic( "iselDfp32Expr_wrk(ppc)" );
4152}
4153
4154static HReg iselDfp64Expr(ISelEnv* env, IRExpr* e)
4155{
4156   HReg r = iselDfp64Expr_wrk( env, e );
4157   vassert(hregClass(r) == HRcFlt64);
4158   vassert( hregIsVirtual(r) );
4159   return r;
4160}
4161
4162/* DO NOT CALL THIS DIRECTLY */
4163static HReg iselDfp64Expr_wrk(ISelEnv* env, IRExpr* e)
4164{
4165   Bool mode64 = env->mode64;
4166   IRType ty = typeOfIRExpr( env->type_env, e );
4167   HReg r_dstHi, r_dstLo;
4168
4169   vassert( e );
4170   vassert( ty == Ity_D64 );
4171
4172   if (e->tag == Iex_RdTmp) {
4173      return lookupIRTemp( env, e->Iex.RdTmp.tmp );
4174   }
4175
4176   /* --------- GET --------- */
4177   if (e->tag == Iex_Get) {
4178      HReg r_dst = newVRegF( env );
4179      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4180                                       GuestStatePtr(mode64) );
4181      addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4182      return r_dst;
4183   }
4184
4185   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
4186      PPCAMode* am_addr;
4187      HReg r_dst = newVRegF(env);
4188      vassert(e->Iex.Load.ty == Ity_D64);
4189      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/);
4190      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4191      return r_dst;
4192   }
4193
4194   /* --------- OPS --------- */
4195   if (e->tag == Iex_Qop) {
4196      HReg r_dst = newVRegF( env );
4197      return r_dst;
4198   }
4199
4200   if (e->tag == Iex_Unop) {
4201      HReg fr_dst = newVRegF(env);
4202      switch (e->Iex.Unop.op) {
4203      case Iop_ReinterpI64asD64: {
4204         /* Given an I64, produce an IEEE754 DFP with the same
4205               bit pattern. */
4206         if (!mode64) {
4207            HReg r_srcHi, r_srcLo;
4208            iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
4209            return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4210         } else {
4211            HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
4212            return mk_LoadR64toFPR( env, r_src );
4213         }
4214      }
4215      case Iop_D32toD64: {
4216         HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg);
4217         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
4218         return fr_dst;
4219      }
4220      case Iop_D128HItoD64:
4221         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg );
4222         return r_dstHi;
4223      case Iop_D128LOtoD64:
4224         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg );
4225         return r_dstLo;
4226      case Iop_InsertExpD64: {
4227         HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1);
4228         HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2);
4229
4230         addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
4231					    fr_srcR));
4232         return fr_dst;
4233       }
4234      default:
4235         vex_printf( "ERROR: iselDfp64Expr_wrk, UNKNOWN unop case %d\n",
4236                     e->Iex.Unop.op );
4237      }
4238   }
4239
4240   if (e->tag == Iex_Binop) {
4241      PPCFpOp fpop = Pfp_INVALID;
4242      HReg fr_dst = newVRegF(env);
4243
4244      switch (e->Iex.Binop.op) {
4245      case Iop_D128toD64:     fpop = Pfp_DRDPQ;  break;
4246      case Iop_D64toD32:      fpop = Pfp_DRSP;   break;
4247      case Iop_I64StoD64:     fpop = Pfp_DCFFIX; break;
4248      case Iop_RoundD64toInt: fpop = Pfp_DRINTN; break;
4249      default: break;
4250      }
4251      if (fpop == Pfp_DRDPQ) {
4252         HReg r_srcHi = newVRegF(env);
4253         HReg r_srcLo = newVRegF(env);
4254
4255         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
4256         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
4257         addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
4258         return fr_dst;
4259
4260      } else if (fpop == Pfp_DRINTN) {
4261         HReg fr_src = newVRegF(env);
4262         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1);
4263
4264         /* NOTE, this IOP takes a DFP value and rounds to the
4265          * neares floating point integer value, i.e. fractional part
4266          * is zero.  The result is a decimal floating point number.
4267          * the INT in the name is a bit misleading.
4268          */
4269         fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
4270         addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
4271         return fr_dst;
4272
4273      } else if (fpop == Pfp_DRSP) {
4274         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
4275         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
4276         addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
4277         return fr_dst;
4278
4279      } else if (fpop == Pfp_DCFFIX) {
4280         HReg fr_src = newVRegF(env);
4281         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4282
4283         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
4284         sub_from_sp( env, 16 );
4285
4286         // put the I64 value into a floating point register
4287         if (mode64) {
4288            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2);
4289
4290           addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
4291         } else {
4292            HReg tmpHi, tmpLo;
4293            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
4294
4295            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2);
4296            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
4297            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
4298         }
4299
4300         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8,  fr_src, zero_r1));
4301         addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
4302         add_to_sp( env, 16 );
4303         return fr_dst;
4304      }
4305
4306      switch (e->Iex.Binop.op) {
4307      /* shift instructions D64, I32 -> D64 */
4308      case Iop_ShlD64: fpop = Pfp_DSCLI; break;
4309      case Iop_ShrD64: fpop = Pfp_DSCRI; break;
4310      default: break;
4311      }
4312      if (fpop != Pfp_INVALID) {
4313         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1);
4314         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2);
4315
4316         /* shift value must be an immediate value */
4317         vassert(shift->tag == Pri_Imm);
4318
4319         addInstr(env, PPCInstr_DfpShift(fpop, fr_dst, fr_src, shift));
4320         return fr_dst;
4321      }
4322
4323      switch (e->Iex.Binop.op) {
4324      case Iop_InsertExpD64:
4325         fpop = Pfp_DIEX;
4326         break;
4327      default: 	break;
4328      }
4329      if (fpop != Pfp_INVALID) {
4330         HReg fr_srcL = newVRegF(env);
4331         HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2);
4332         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4333         sub_from_sp( env, 16 );
4334
4335         if (env->mode64) {
4336            // put the I64 value into a floating point reg
4337            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1);
4338
4339            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
4340         } else {
4341            // put the I64 register pair into a floating point reg
4342            HReg tmpHi;
4343            HReg tmpLo;
4344            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
4345
4346            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1);
4347            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
4348            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
4349         }
4350         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_srcL, zero_r1));
4351         addInstr(env, PPCInstr_Dfp64Binary(fpop, fr_dst, fr_srcL,
4352                                            fr_srcR));
4353         add_to_sp( env, 16 );
4354         return fr_dst;
4355      }
4356   }
4357
4358   if (e->tag == Iex_Triop) {
4359      IRTriop *triop = e->Iex.Triop.details;
4360      PPCFpOp fpop = Pfp_INVALID;
4361
4362      switch (triop->op) {
4363      case Iop_AddD64:
4364         fpop = Pfp_DFPADD;
4365         break;
4366      case Iop_SubD64:
4367         fpop = Pfp_DFPSUB;
4368         break;
4369      case Iop_MulD64:
4370         fpop = Pfp_DFPMUL;
4371         break;
4372      case Iop_DivD64:
4373         fpop = Pfp_DFPDIV;
4374         break;
4375      default:
4376         break;
4377      }
4378      if (fpop != Pfp_INVALID) {
4379         HReg r_dst = newVRegF( env );
4380         HReg r_srcL = iselDfp64Expr( env, triop->arg2 );
4381         HReg r_srcR = iselDfp64Expr( env, triop->arg3 );
4382
4383         set_FPU_DFP_rounding_mode( env, triop->arg1 );
4384         addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
4385         return r_dst;
4386      }
4387
4388      switch (triop->op) {
4389      case Iop_QuantizeD64:          fpop = Pfp_DQUA;  break;
4390      case Iop_SignificanceRoundD64: fpop = Pfp_RRDTR; break;
4391      default: break;
4392      }
4393      if (fpop == Pfp_DQUA) {
4394         HReg r_dst = newVRegF(env);
4395         HReg r_srcL = iselDfp64Expr(env, triop->arg2);
4396         HReg r_srcR = iselDfp64Expr(env, triop->arg3);
4397         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1);
4398         addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
4399                                            rmc));
4400         return r_dst;
4401
4402      } else if (fpop == Pfp_RRDTR) {
4403         HReg r_dst = newVRegF(env);
4404         HReg r_srcL = newVRegF(env);
4405         HReg r_srcR = iselDfp64Expr(env, triop->arg3);
4406         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1);
4407         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4408         HReg i8_val = iselWordExpr_R(env, triop->arg2);
4409
4410         /* Move I8 to float register to issue instruction */
4411         sub_from_sp( env, 16 );
4412         if (mode64)
4413            addInstr(env, PPCInstr_Store(8, zero_r1, i8_val, True/*mode64*/));
4414         else
4415            addInstr(env, PPCInstr_Store(4, zero_r1, i8_val, False/*mode32*/));
4416
4417         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
4418         add_to_sp( env, 16 );
4419
4420         // will set TE and RMC when issuing instruction
4421         addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR, rmc));
4422         return r_dst;
4423      }
4424   }
4425
4426   ppIRExpr( e );
4427   vpanic( "iselDfp64Expr_wrk(ppc)" );
4428}
4429
4430static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e)
4431{
4432   iselDfp128Expr_wrk( rHi, rLo, env, e );
4433   vassert( hregIsVirtual(*rHi) );
4434   vassert( hregIsVirtual(*rLo) );
4435}
4436
4437/* DO NOT CALL THIS DIRECTLY */
4438static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env, IRExpr* e)
4439{
4440   vassert( e );
4441   vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
4442
4443   /* read 128-bit IRTemp */
4444   if (e->tag == Iex_RdTmp) {
4445      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp );
4446      return;
4447   }
4448
4449   if (e->tag == Iex_Unop) {
4450      HReg r_dstHi = newVRegF(env);
4451      HReg r_dstLo = newVRegF(env);
4452
4453      if (e->Iex.Unop.op == Iop_I64StoD128) {
4454         HReg fr_src = newVRegF(env);
4455         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4456
4457         // put the I64 value into a floating point reg
4458         if (env->mode64) {
4459            HReg tmp   = iselWordExpr_R(env, e->Iex.Unop.arg);
4460            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
4461         } else {
4462            HReg tmpHi, tmpLo;
4463            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
4464
4465            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg);
4466            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
4467            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
4468         }
4469
4470         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
4471         addInstr(env, PPCInstr_DfpI64StoD128(Pfp_DCFFIXQ, r_dstHi, r_dstLo,
4472                                              fr_src));
4473      }
4474
4475      if (e->Iex.Unop.op == Iop_D64toD128) {
4476         HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg);
4477
4478         /* Source is 64bit, result is 128 bit.  High 64bit source arg,
4479          * is ignored by the instruction.  Set high arg to r_src just
4480          * to meet the vassert tests.
4481          */
4482         addInstr(env, PPCInstr_Dfp128Unary(Pfp_DCTQPQ, r_dstHi, r_dstLo,
4483                                            r_src, r_src));
4484      }
4485      *rHi = r_dstHi;
4486      *rLo = r_dstLo;
4487      return;
4488   }
4489
4490   /* --------- OPS --------- */
4491   if (e->tag == Iex_Binop) {
4492      HReg r_srcHi;
4493      HReg r_srcLo;
4494
4495      switch (e->Iex.Binop.op) {
4496      case Iop_D64HLtoD128:
4497         r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1 );
4498         r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2 );
4499         *rHi = r_srcHi;
4500         *rLo = r_srcLo;
4501         return;
4502         break;
4503      case Iop_D128toD64: {
4504         PPCFpOp fpop = Pfp_DRDPQ;
4505         HReg fr_dst  = newVRegF(env);
4506
4507         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
4508         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
4509         addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
4510
4511         /* Need to meet the interface spec but the result is
4512          * just 64-bits so send the result back in both halfs.
4513          */
4514         *rHi = fr_dst;
4515         *rLo = fr_dst;
4516         return;
4517      }
4518      case Iop_ShlD128:
4519      case Iop_ShrD128: {
4520         HReg fr_dst_hi = newVRegF(env);
4521         HReg fr_dst_lo = newVRegF(env);
4522         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2);
4523         PPCFpOp fpop = Pfp_DSCLIQ;  /* fix later if necessary */
4524
4525         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1);
4526
4527         if (e->Iex.Binop.op == Iop_ShrD128)
4528            fpop = Pfp_DSCRIQ;
4529
4530         addInstr(env, PPCInstr_DfpShift128(fpop, fr_dst_hi, fr_dst_lo,
4531                                            r_srcHi, r_srcLo, shift));
4532
4533         *rHi = fr_dst_hi;
4534         *rLo = fr_dst_lo;
4535         return;
4536      }
4537      case Iop_RoundD128toInt: {
4538         HReg r_dstHi = newVRegF(env);
4539         HReg r_dstLo = newVRegF(env);
4540         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1);
4541
4542         // will set R and RMC when issuing instruction
4543         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
4544
4545         addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
4546                                            r_srcHi, r_srcLo, r_rmc));
4547         *rHi = r_dstHi;
4548         *rLo = r_dstLo;
4549         return;
4550      }
4551      case Iop_InsertExpD128: {
4552         HReg r_dstHi = newVRegF(env);
4553         HReg r_dstLo = newVRegF(env);
4554         HReg r_srcL  = newVRegF(env);
4555         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4556         r_srcHi = newVRegF(env);
4557         r_srcLo = newVRegF(env);
4558
4559         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
4560
4561         /* Move I64 to float register to issue instruction */
4562         if (env->mode64) {
4563            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1);
4564            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
4565         } else {
4566            HReg tmpHi, tmpLo;
4567            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
4568
4569            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg);
4570            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
4571            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
4572         }
4573
4574         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
4575         addInstr(env, PPCInstr_InsertExpD128(Pfp_DIEXQ,
4576                                              r_dstHi, r_dstLo,
4577                                              r_srcL, r_srcHi, r_srcLo));
4578         *rHi = r_dstHi;
4579         *rLo = r_dstLo;
4580         return;
4581      }
4582      default:
4583         vex_printf( "ERROR: iselDfp128Expr_wrk, UNKNOWN binop case %d\n",
4584                     e->Iex.Binop.op );
4585         break;
4586      }
4587   }
4588
4589   if (e->tag == Iex_Triop) {
4590      IRTriop *triop = e->Iex.Triop.details;
4591      PPCFpOp fpop = Pfp_INVALID;
4592      HReg r_dstHi = newVRegF(env);
4593      HReg r_dstLo = newVRegF(env);
4594
4595      switch (triop->op) {
4596      case Iop_AddD128:
4597         fpop = Pfp_DFPADDQ;
4598         break;
4599      case Iop_SubD128:
4600         fpop = Pfp_DFPSUBQ;
4601         break;
4602      case Iop_MulD128:
4603         fpop = Pfp_DFPMULQ;
4604         break;
4605      case Iop_DivD128:
4606         fpop = Pfp_DFPDIVQ;
4607         break;
4608      default:
4609         break;
4610      }
4611
4612      if (fpop != Pfp_INVALID) {
4613         HReg r_srcRHi = newVRegV( env );
4614         HReg r_srcRLo = newVRegV( env );
4615
4616         /* dst will be used to pass in the left operand and get the result. */
4617         iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2 );
4618         iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3 );
4619         set_FPU_DFP_rounding_mode( env, triop->arg1 );
4620         addInstr( env,
4621                   PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
4622                                          r_srcRHi, r_srcRLo ) );
4623         *rHi = r_dstHi;
4624         *rLo = r_dstLo;
4625         return;
4626      }
4627      switch (triop->op) {
4628      case Iop_QuantizeD128:          fpop = Pfp_DQUAQ;  break;
4629      case Iop_SignificanceRoundD128: fpop = Pfp_DRRNDQ; break;
4630      default: break;
4631      }
4632      if (fpop == Pfp_DQUAQ) {
4633         HReg r_srcHi = newVRegF(env);
4634         HReg r_srcLo = newVRegF(env);
4635         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1);
4636
4637         /* dst will be used to pass in the left operand and get the result */
4638         iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2);
4639         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3);
4640
4641         // will set RMC when issuing instruction
4642         addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
4643                                               r_srcHi, r_srcLo, rmc));
4644        *rHi = r_dstHi;
4645        *rLo = r_dstLo;
4646         return;
4647
4648      } else if (fpop == Pfp_DRRNDQ) {
4649         HReg r_srcHi = newVRegF(env);
4650         HReg r_srcLo = newVRegF(env);
4651         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1);
4652         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4653         PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
4654         HReg i8_val = iselWordExpr_R(env, triop->arg2);
4655         HReg r_zero = newVRegI( env );
4656
4657         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3);
4658
4659         /* dst will be used to pass in the left operand and get the result */
4660         /* Move I8 to float register to issue instruction.  Note, the
4661          * instruction only looks at the bottom 6 bits so we really don't
4662          * have to clear the upper bits since the iselWordExpr_R sets the
4663          * bottom 8-bits.
4664          */
4665         sub_from_sp( env, 16 );
4666
4667         if (env->mode64)
4668            addInstr(env, PPCInstr_Store(4, four_r1, i8_val, True/*mode64*/));
4669         else
4670            addInstr(env, PPCInstr_Store(4, four_r1, i8_val, False/*mode32*/));
4671
4672         /* Have to write to the upper bits to ensure they have been
4673          * initialized. The instruction ignores all but the lower 6-bits.
4674          */
4675         addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
4676         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstHi, zero_r1));
4677         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstLo, zero_r1));
4678
4679         add_to_sp( env, 16 );
4680
4681         // will set RMC when issuing instruction
4682         addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
4683                                               r_srcHi, r_srcLo, rmc));
4684         *rHi = r_dstHi;
4685         *rLo = r_dstLo;
4686         return;
4687      }
4688 }
4689
4690   ppIRExpr( e );
4691   vpanic( "iselDfp128Expr(ppc64)" );
4692}
4693
4694
4695/*---------------------------------------------------------*/
4696/*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
4697/*---------------------------------------------------------*/
4698
4699static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
4700{
4701   HReg r = iselVecExpr_wrk( env, e );
4702#  if 0
4703   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4704#  endif
4705   vassert(hregClass(r) == HRcVec128);
4706   vassert(hregIsVirtual(r));
4707   return r;
4708}
4709
4710/* DO NOT CALL THIS DIRECTLY */
4711static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
4712{
4713   Bool mode64 = env->mode64;
4714   PPCAvOp op = Pav_INVALID;
4715   PPCAvFpOp fpop = Pavfp_INVALID;
4716   IRType  ty = typeOfIRExpr(env->type_env,e);
4717   vassert(e);
4718   vassert(ty == Ity_V128);
4719
4720   if (e->tag == Iex_RdTmp) {
4721      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4722   }
4723
4724   if (e->tag == Iex_Get) {
4725      /* Guest state vectors are 16byte aligned,
4726         so don't need to worry here */
4727      HReg dst = newVRegV(env);
4728      addInstr(env,
4729               PPCInstr_AvLdSt( True/*load*/, 16, dst,
4730                                PPCAMode_IR( e->Iex.Get.offset,
4731                                             GuestStatePtr(mode64) )));
4732      return dst;
4733   }
4734
4735   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
4736      PPCAMode* am_addr;
4737      HReg v_dst = newVRegV(env);
4738      vassert(e->Iex.Load.ty == Ity_V128);
4739      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_V128/*xfer*/);
4740      addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, v_dst, am_addr));
4741      return v_dst;
4742   }
4743
4744   if (e->tag == Iex_Unop) {
4745      switch (e->Iex.Unop.op) {
4746
4747      case Iop_NotV128: {
4748         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
4749         HReg dst = newVRegV(env);
4750         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
4751         return dst;
4752      }
4753
4754      case Iop_CmpNEZ8x16: {
4755         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
4756         HReg zero = newVRegV(env);
4757         HReg dst  = newVRegV(env);
4758         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
4759         addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
4760         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
4761         return dst;
4762      }
4763
4764      case Iop_CmpNEZ16x8: {
4765         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
4766         HReg zero = newVRegV(env);
4767         HReg dst  = newVRegV(env);
4768         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
4769         addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
4770         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
4771         return dst;
4772      }
4773
4774      case Iop_CmpNEZ32x4: {
4775         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
4776         HReg zero = newVRegV(env);
4777         HReg dst  = newVRegV(env);
4778         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
4779         addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
4780         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
4781         return dst;
4782      }
4783
4784      case Iop_CmpNEZ64x2: {
4785         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
4786         HReg zero = newVRegV(env);
4787         HReg dst  = newVRegV(env);
4788         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
4789         addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
4790         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
4791         return dst;
4792      }
4793
4794      case Iop_Recip32Fx4:    fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
4795      case Iop_RSqrt32Fx4:    fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
4796      case Iop_I32UtoFx4:     fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
4797      case Iop_I32StoFx4:     fpop = Pavfp_CVTS2F;  goto do_32Fx4_unary;
4798      case Iop_QFtoI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
4799      case Iop_QFtoI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
4800      case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM;  goto do_32Fx4_unary;
4801      case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP;  goto do_32Fx4_unary;
4802      case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN;  goto do_32Fx4_unary;
4803      case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
4804      do_32Fx4_unary:
4805      {
4806         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
4807         HReg dst = newVRegV(env);
4808         addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
4809         return dst;
4810      }
4811
4812      case Iop_32UtoV128: {
4813         HReg r_aligned16, r_zeros;
4814         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
4815         HReg   dst = newVRegV(env);
4816         PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
4817         sub_from_sp( env, 32 );     // Move SP down
4818
4819         /* Get a quadword aligned address within our stack space */
4820         r_aligned16 = get_sp_aligned16( env );
4821         am_off0  = PPCAMode_IR( 0,  r_aligned16 );
4822         am_off4  = PPCAMode_IR( 4,  r_aligned16 );
4823         am_off8  = PPCAMode_IR( 8,  r_aligned16 );
4824         am_off12 = PPCAMode_IR( 12, r_aligned16 );
4825
4826         /* Store zeros */
4827         r_zeros = newVRegI(env);
4828         addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
4829         addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
4830         addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
4831         addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
4832
4833         /* Store r_src in low word of quadword-aligned mem */
4834         addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
4835
4836         /* Load word into low word of quadword vector reg */
4837         addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
4838
4839         add_to_sp( env, 32 );       // Reset SP
4840         return dst;
4841      }
4842
4843      case Iop_Dup8x16:
4844      case Iop_Dup16x8:
4845      case Iop_Dup32x4:
4846         return mk_AvDuplicateRI(env, e->Iex.Unop.arg);
4847
4848      case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
4849      do_AvCipherV128Un: {
4850         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
4851         HReg dst = newVRegV(env);
4852         addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
4853         return dst;
4854      }
4855
4856      case Iop_Clz8Sx16: fpop = Pav_ZEROCNTBYTE;   goto do_zerocnt;
4857      case Iop_Clz16Sx8: fpop = Pav_ZEROCNTHALF;   goto do_zerocnt;
4858      case Iop_Clz32Sx4: fpop = Pav_ZEROCNTWORD;   goto do_zerocnt;
4859      case Iop_Clz64x2:  fpop = Pav_ZEROCNTDBL;    goto do_zerocnt;
4860      case Iop_PwBitMtxXpose64x2: fpop = Pav_BITMTXXPOSE;  goto do_zerocnt;
4861      do_zerocnt:
4862      {
4863        HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
4864        HReg dst = newVRegV(env);
4865        addInstr(env, PPCInstr_AvUnary(fpop, dst, arg));
4866        return dst;
4867      }
4868
4869      default:
4870         break;
4871      } /* switch (e->Iex.Unop.op) */
4872   } /* if (e->tag == Iex_Unop) */
4873
4874   if (e->tag == Iex_Binop) {
4875      switch (e->Iex.Binop.op) {
4876
4877      case Iop_64HLtoV128: {
4878         if (!mode64) {
4879            HReg     r3, r2, r1, r0, r_aligned16;
4880            PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
4881            HReg     dst = newVRegV(env);
4882            /* do this via the stack (easy, convenient, etc) */
4883            sub_from_sp( env, 32 );        // Move SP down
4884
4885            // get a quadword aligned address within our stack space
4886            r_aligned16 = get_sp_aligned16( env );
4887            am_off0  = PPCAMode_IR( 0,  r_aligned16 );
4888            am_off4  = PPCAMode_IR( 4,  r_aligned16 );
4889            am_off8  = PPCAMode_IR( 8,  r_aligned16 );
4890            am_off12 = PPCAMode_IR( 12, r_aligned16 );
4891
4892            /* Do the less significant 64 bits */
4893            iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
4894            addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
4895            addInstr(env, PPCInstr_Store( 4, am_off8,  r1, mode64 ));
4896            /* Do the more significant 64 bits */
4897            iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
4898            addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
4899            addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
4900
4901            /* Fetch result back from stack. */
4902            addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
4903
4904            add_to_sp( env, 32 );          // Reset SP
4905            return dst;
4906         } else {
4907            HReg     rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
4908            HReg     rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
4909            HReg     dst = newVRegV(env);
4910            HReg     r_aligned16;
4911            PPCAMode *am_off0, *am_off8;
4912            /* do this via the stack (easy, convenient, etc) */
4913            sub_from_sp( env, 32 );        // Move SP down
4914
4915            // get a quadword aligned address within our stack space
4916            r_aligned16 = get_sp_aligned16( env );
4917            am_off0  = PPCAMode_IR( 0,  r_aligned16 );
4918            am_off8  = PPCAMode_IR( 8,  r_aligned16 );
4919
4920            /* Store 2*I64 to stack */
4921            addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
4922            addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
4923
4924            /* Fetch result back from stack. */
4925            addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
4926
4927            add_to_sp( env, 32 );          // Reset SP
4928            return dst;
4929         }
4930      }
4931
4932      case Iop_Max32Fx4:   fpop = Pavfp_MAXF;   goto do_32Fx4;
4933      case Iop_Min32Fx4:   fpop = Pavfp_MINF;   goto do_32Fx4;
4934      case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
4935      case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
4936      case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
4937      do_32Fx4:
4938      {
4939         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
4940         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
4941         HReg dst = newVRegV(env);
4942         addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
4943         return dst;
4944      }
4945
4946      case Iop_CmpLE32Fx4: {
4947         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
4948         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
4949         HReg dst = newVRegV(env);
4950
4951         /* stay consistent with native ppc compares:
4952            if a left/right lane holds a nan, return zeros for that lane
4953            so: le == NOT(gt OR isNan)
4954          */
4955         HReg isNanLR = newVRegV(env);
4956         HReg isNanL = isNan(env, argL);
4957         HReg isNanR = isNan(env, argR);
4958         addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
4959                                         isNanL, isNanR));
4960
4961         addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
4962                                           argL, argR));
4963         addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
4964         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
4965         return dst;
4966      }
4967
4968      case Iop_AndV128:    op = Pav_AND;      goto do_AvBin;
4969      case Iop_OrV128:     op = Pav_OR;       goto do_AvBin;
4970      case Iop_XorV128:    op = Pav_XOR;      goto do_AvBin;
4971      do_AvBin: {
4972         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
4973         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
4974         HReg dst  = newVRegV(env);
4975         addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
4976         return dst;
4977      }
4978
4979      case Iop_Shl8x16:    op = Pav_SHL;    goto do_AvBin8x16;
4980      case Iop_Shr8x16:    op = Pav_SHR;    goto do_AvBin8x16;
4981      case Iop_Sar8x16:    op = Pav_SAR;    goto do_AvBin8x16;
4982      case Iop_Rol8x16:    op = Pav_ROTL;   goto do_AvBin8x16;
4983      case Iop_InterleaveHI8x16: op = Pav_MRGHI;  goto do_AvBin8x16;
4984      case Iop_InterleaveLO8x16: op = Pav_MRGLO;  goto do_AvBin8x16;
4985      case Iop_Add8x16:    op = Pav_ADDU;   goto do_AvBin8x16;
4986      case Iop_QAdd8Ux16:  op = Pav_QADDU;  goto do_AvBin8x16;
4987      case Iop_QAdd8Sx16:  op = Pav_QADDS;  goto do_AvBin8x16;
4988      case Iop_Sub8x16:    op = Pav_SUBU;   goto do_AvBin8x16;
4989      case Iop_QSub8Ux16:  op = Pav_QSUBU;  goto do_AvBin8x16;
4990      case Iop_QSub8Sx16:  op = Pav_QSUBS;  goto do_AvBin8x16;
4991      case Iop_Avg8Ux16:   op = Pav_AVGU;   goto do_AvBin8x16;
4992      case Iop_Avg8Sx16:   op = Pav_AVGS;   goto do_AvBin8x16;
4993      case Iop_Max8Ux16:   op = Pav_MAXU;   goto do_AvBin8x16;
4994      case Iop_Max8Sx16:   op = Pav_MAXS;   goto do_AvBin8x16;
4995      case Iop_Min8Ux16:   op = Pav_MINU;   goto do_AvBin8x16;
4996      case Iop_Min8Sx16:   op = Pav_MINS;   goto do_AvBin8x16;
4997      case Iop_MullEven8Ux16: op = Pav_OMULU;  goto do_AvBin8x16;
4998      case Iop_MullEven8Sx16: op = Pav_OMULS;  goto do_AvBin8x16;
4999      case Iop_CmpEQ8x16:  op = Pav_CMPEQU; goto do_AvBin8x16;
5000      case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
5001      case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
5002      case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
5003      do_AvBin8x16: {
5004         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
5005         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
5006         HReg dst  = newVRegV(env);
5007         addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
5008         return dst;
5009      }
5010
5011      case Iop_Shl16x8:    op = Pav_SHL;    goto do_AvBin16x8;
5012      case Iop_Shr16x8:    op = Pav_SHR;    goto do_AvBin16x8;
5013      case Iop_Sar16x8:    op = Pav_SAR;    goto do_AvBin16x8;
5014      case Iop_Rol16x8:    op = Pav_ROTL;   goto do_AvBin16x8;
5015      case Iop_NarrowBin16to8x16:    op = Pav_PACKUU;  goto do_AvBin16x8;
5016      case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
5017      case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
5018      case Iop_InterleaveHI16x8:  op = Pav_MRGHI;  goto do_AvBin16x8;
5019      case Iop_InterleaveLO16x8:  op = Pav_MRGLO;  goto do_AvBin16x8;
5020      case Iop_Add16x8:    op = Pav_ADDU;   goto do_AvBin16x8;
5021      case Iop_QAdd16Ux8:  op = Pav_QADDU;  goto do_AvBin16x8;
5022      case Iop_QAdd16Sx8:  op = Pav_QADDS;  goto do_AvBin16x8;
5023      case Iop_Sub16x8:    op = Pav_SUBU;   goto do_AvBin16x8;
5024      case Iop_QSub16Ux8:  op = Pav_QSUBU;  goto do_AvBin16x8;
5025      case Iop_QSub16Sx8:  op = Pav_QSUBS;  goto do_AvBin16x8;
5026      case Iop_Avg16Ux8:   op = Pav_AVGU;   goto do_AvBin16x8;
5027      case Iop_Avg16Sx8:   op = Pav_AVGS;   goto do_AvBin16x8;
5028      case Iop_Max16Ux8:   op = Pav_MAXU;   goto do_AvBin16x8;
5029      case Iop_Max16Sx8:   op = Pav_MAXS;   goto do_AvBin16x8;
5030      case Iop_Min16Ux8:   op = Pav_MINU;   goto do_AvBin16x8;
5031      case Iop_Min16Sx8:   op = Pav_MINS;   goto do_AvBin16x8;
5032      case Iop_MullEven16Ux8: op = Pav_OMULU;  goto do_AvBin16x8;
5033      case Iop_MullEven16Sx8: op = Pav_OMULS;  goto do_AvBin16x8;
5034      case Iop_CmpEQ16x8:  op = Pav_CMPEQU; goto do_AvBin16x8;
5035      case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
5036      case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
5037      case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
5038      do_AvBin16x8: {
5039         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
5040         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
5041         HReg dst  = newVRegV(env);
5042         addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
5043         return dst;
5044      }
5045
5046      case Iop_Shl32x4:    op = Pav_SHL;    goto do_AvBin32x4;
5047      case Iop_Shr32x4:    op = Pav_SHR;    goto do_AvBin32x4;
5048      case Iop_Sar32x4:    op = Pav_SAR;    goto do_AvBin32x4;
5049      case Iop_Rol32x4:    op = Pav_ROTL;   goto do_AvBin32x4;
5050      case Iop_NarrowBin32to16x8:    op = Pav_PACKUU;  goto do_AvBin32x4;
5051      case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
5052      case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
5053      case Iop_InterleaveHI32x4:  op = Pav_MRGHI;  goto do_AvBin32x4;
5054      case Iop_InterleaveLO32x4:  op = Pav_MRGLO;  goto do_AvBin32x4;
5055      case Iop_Add32x4:    op = Pav_ADDU;   goto do_AvBin32x4;
5056      case Iop_QAdd32Ux4:  op = Pav_QADDU;  goto do_AvBin32x4;
5057      case Iop_QAdd32Sx4:  op = Pav_QADDS;  goto do_AvBin32x4;
5058      case Iop_Sub32x4:    op = Pav_SUBU;   goto do_AvBin32x4;
5059      case Iop_QSub32Ux4:  op = Pav_QSUBU;  goto do_AvBin32x4;
5060      case Iop_QSub32Sx4:  op = Pav_QSUBS;  goto do_AvBin32x4;
5061      case Iop_Avg32Ux4:   op = Pav_AVGU;   goto do_AvBin32x4;
5062      case Iop_Avg32Sx4:   op = Pav_AVGS;   goto do_AvBin32x4;
5063      case Iop_Max32Ux4:   op = Pav_MAXU;   goto do_AvBin32x4;
5064      case Iop_Max32Sx4:   op = Pav_MAXS;   goto do_AvBin32x4;
5065      case Iop_Min32Ux4:   op = Pav_MINU;   goto do_AvBin32x4;
5066      case Iop_Min32Sx4:   op = Pav_MINS;   goto do_AvBin32x4;
5067      case Iop_Mul32x4:    op = Pav_MULU;   goto do_AvBin32x4;
5068      case Iop_MullEven32Ux4: op = Pav_OMULU;  goto do_AvBin32x4;
5069      case Iop_MullEven32Sx4: op = Pav_OMULS;  goto do_AvBin32x4;
5070      case Iop_CmpEQ32x4:  op = Pav_CMPEQU; goto do_AvBin32x4;
5071      case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
5072      case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
5073      case Iop_CatOddLanes32x4:  op = Pav_CATODD;  goto do_AvBin32x4;
5074      case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
5075      case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
5076      do_AvBin32x4: {
5077         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
5078         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
5079         HReg dst  = newVRegV(env);
5080         addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
5081         return dst;
5082      }
5083
5084      case Iop_Shl64x2:    op = Pav_SHL;    goto do_AvBin64x2;
5085      case Iop_Shr64x2:    op = Pav_SHR;    goto do_AvBin64x2;
5086      case Iop_Sar64x2:    op = Pav_SAR;    goto do_AvBin64x2;
5087      case Iop_Rol64x2:    op = Pav_ROTL;   goto do_AvBin64x2;
5088      case Iop_NarrowBin64to32x4:    op = Pav_PACKUU;  goto do_AvBin64x2;
5089      case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2;
5090      case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2;
5091      case Iop_InterleaveHI64x2:  op = Pav_MRGHI;  goto do_AvBin64x2;
5092      case Iop_InterleaveLO64x2:  op = Pav_MRGLO;  goto do_AvBin64x2;
5093      case Iop_Add64x2:    op = Pav_ADDU;   goto do_AvBin64x2;
5094      case Iop_Sub64x2:    op = Pav_SUBU;   goto do_AvBin64x2;
5095      case Iop_Max64Ux2:   op = Pav_MAXU;   goto do_AvBin64x2;
5096      case Iop_Max64Sx2:   op = Pav_MAXS;   goto do_AvBin64x2;
5097      case Iop_Min64Ux2:   op = Pav_MINU;   goto do_AvBin64x2;
5098      case Iop_Min64Sx2:   op = Pav_MINS;   goto do_AvBin64x2;
5099      case Iop_CmpEQ64x2:  op = Pav_CMPEQU; goto do_AvBin64x2;
5100      case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
5101      case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
5102      case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
5103      do_AvBin64x2: {
5104         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
5105         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
5106         HReg dst  = newVRegV(env);
5107         addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
5108         return dst;
5109      }
5110
5111      case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
5112      case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
5113      do_AvShift8x16: {
5114         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
5115         HReg dst    = newVRegV(env);
5116         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
5117         addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
5118         return dst;
5119      }
5120
5121      case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
5122      case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
5123      case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
5124      do_AvShift16x8: {
5125         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
5126         HReg dst    = newVRegV(env);
5127         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
5128         addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
5129         return dst;
5130      }
5131
5132      case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
5133      case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
5134      case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
5135      do_AvShift32x4: {
5136         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
5137         HReg dst    = newVRegV(env);
5138         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
5139         addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
5140         return dst;
5141      }
5142
5143      case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2;
5144      case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
5145      case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
5146      do_AvShift64x2: {
5147         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
5148         HReg dst    = newVRegV(env);
5149         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
5150         addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
5151         return dst;
5152      }
5153
5154      case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
5155      case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
5156      do_AvShiftV128: {
5157         HReg dst    = newVRegV(env);
5158         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
5159         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
5160         /* Note: shift value gets masked by 127 */
5161         addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
5162         return dst;
5163      }
5164
5165      case Iop_Perm8x16: {
5166         HReg dst   = newVRegV(env);
5167         HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1);
5168         HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2);
5169         addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
5170         return dst;
5171      }
5172
5173      case Iop_CipherV128:  op = Pav_CIPHERV128;   goto do_AvCipherV128;
5174      case Iop_CipherLV128: op = Pav_CIPHERLV128;  goto do_AvCipherV128;
5175      case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
5176      case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
5177      do_AvCipherV128: {
5178         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
5179         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
5180         HReg dst  = newVRegV(env);
5181         addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
5182         return dst;
5183      }
5184
5185      case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
5186      case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
5187      do_AvHashV128: {
5188         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
5189         HReg dst  = newVRegV(env);
5190         PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2);
5191         addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
5192         return dst;
5193      }
5194      default:
5195         break;
5196      } /* switch (e->Iex.Binop.op) */
5197   } /* if (e->tag == Iex_Binop) */
5198
5199   if (e->tag == Iex_Triop) {
5200      IRTriop *triop = e->Iex.Triop.details;
5201      switch (triop->op) {
5202      case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
5203      case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
5204      do_AvBCDV128: {
5205         HReg arg1 = iselVecExpr(env, triop->arg1);
5206         HReg arg2 = iselVecExpr(env, triop->arg2);
5207         HReg dst  = newVRegV(env);
5208         PPCRI* ps = iselWordExpr_RI(env, triop->arg3);
5209         addInstr(env, PPCInstr_AvBCDV128Trinary(op, dst, arg1, arg2, ps));
5210         return dst;
5211      }
5212
5213      case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
5214      case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
5215      case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
5216      do_32Fx4_with_rm:
5217      {
5218         HReg argL = iselVecExpr(env, triop->arg2);
5219         HReg argR = iselVecExpr(env, triop->arg3);
5220         HReg dst  = newVRegV(env);
5221         /* FIXME: this is bogus, in the sense that Altivec ignores
5222            FPSCR.RM, at least for some FP operations.  So setting the
5223            RM is pointless.  This is only really correct in the case
5224            where the RM is known, at JIT time, to be Irrm_NEAREST,
5225            since -- at least for Altivec FP add/sub/mul -- the
5226            emitted insn is hardwired to round to nearest. */
5227         set_FPU_rounding_mode(env, triop->arg1);
5228         addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
5229         return dst;
5230      }
5231
5232      default:
5233         break;
5234      } /* switch (e->Iex.Triop.op) */
5235   } /* if (e->tag == Iex_Trinop) */
5236
5237
5238   if (e->tag == Iex_Const ) {
5239      vassert(e->Iex.Const.con->tag == Ico_V128);
5240      if (e->Iex.Const.con->Ico.V128 == 0x0000) {
5241         return generate_zeroes_V128(env);
5242      }
5243      else if (e->Iex.Const.con->Ico.V128 == 0xffff) {
5244         return generate_ones_V128(env);
5245      }
5246   }
5247
5248   vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
5249              LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
5250                                 env->hwcaps));
5251   ppIRExpr(e);
5252   vpanic("iselVecExpr_wrk(ppc)");
5253}
5254
5255
5256/*---------------------------------------------------------*/
5257/*--- ISEL: Statements                                  ---*/
5258/*---------------------------------------------------------*/
5259
5260static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5261{
5262   Bool mode64 = env->mode64;
5263   if (vex_traceflags & VEX_TRACE_VCODE) {
5264      vex_printf("\n -- ");
5265      ppIRStmt(stmt);
5266      vex_printf("\n");
5267   }
5268
5269   switch (stmt->tag) {
5270
5271   /* --------- STORE --------- */
5272   case Ist_Store: {
5273      IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5274      IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5275      IREndness end   = stmt->Ist.Store.end;
5276
5277      if (end != Iend_BE)
5278         goto stmt_fail;
5279      if (!mode64 && (tya != Ity_I32))
5280         goto stmt_fail;
5281      if (mode64 && (tya != Ity_I64))
5282         goto stmt_fail;
5283
5284      if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
5285          (mode64 && (tyd == Ity_I64))) {
5286         PPCAMode* am_addr
5287            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
5288         HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data);
5289         addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
5290                                       am_addr, r_src, mode64 ));
5291         return;
5292      }
5293      if (tyd == Ity_F64) {
5294         PPCAMode* am_addr
5295            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
5296         HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data);
5297         addInstr(env,
5298                  PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
5299         return;
5300      }
5301      if (tyd == Ity_F32) {
5302         PPCAMode* am_addr
5303            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
5304         HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data);
5305         addInstr(env,
5306                  PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
5307         return;
5308      }
5309      if (tyd == Ity_D64) {
5310         PPCAMode* am_addr
5311            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
5312         HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data);
5313         addInstr(env,
5314                  PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
5315         return;
5316      }
5317      if (tyd == Ity_D32) {
5318         PPCAMode* am_addr
5319            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
5320         HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data);
5321         addInstr(env,
5322                  PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
5323         return;
5324      }
5325      if (tyd == Ity_V128) {
5326         PPCAMode* am_addr
5327            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
5328         HReg v_src = iselVecExpr(env, stmt->Ist.Store.data);
5329         addInstr(env,
5330                  PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
5331         return;
5332      }
5333      if (tyd == Ity_I64 && !mode64) {
5334         /* Just calculate the address in the register.  Life is too
5335            short to arse around trying and possibly failing to adjust
5336            the offset in a 'reg+offset' style amode. */
5337         HReg rHi32, rLo32;
5338         HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr);
5339         iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data );
5340         addInstr(env, PPCInstr_Store( 4/*byte-store*/,
5341                                       PPCAMode_IR( 0, r_addr ),
5342                                       rHi32,
5343                                       False/*32-bit insn please*/) );
5344         addInstr(env, PPCInstr_Store( 4/*byte-store*/,
5345                                       PPCAMode_IR( 4, r_addr ),
5346                                       rLo32,
5347                                       False/*32-bit insn please*/) );
5348         return;
5349      }
5350      break;
5351   }
5352
5353   /* --------- PUT --------- */
5354   case Ist_Put: {
5355      IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5356      if (ty == Ity_I8  || ty == Ity_I16 ||
5357          ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
5358         HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data);
5359         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
5360                                          GuestStatePtr(mode64) );
5361         addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
5362                                       am_addr, r_src, mode64 ));
5363         return;
5364      }
5365      if (!mode64 && ty == Ity_I64) {
5366         HReg rHi, rLo;
5367         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
5368                                           GuestStatePtr(mode64) );
5369         PPCAMode* am_addr4 = advance4(env, am_addr);
5370         iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data);
5371         addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
5372         addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
5373         return;
5374     }
5375     if (ty == Ity_V128) {
5376         /* Guest state vectors are 16byte aligned,
5377            so don't need to worry here */
5378         HReg v_src = iselVecExpr(env, stmt->Ist.Put.data);
5379         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
5380                                           GuestStatePtr(mode64) );
5381         addInstr(env,
5382                  PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
5383         return;
5384      }
5385      if (ty == Ity_F64) {
5386         HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data);
5387         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
5388                                          GuestStatePtr(mode64) );
5389         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
5390                                        fr_src, am_addr ));
5391         return;
5392      }
5393      if (ty == Ity_D32) {
5394         /* The 32-bit value is stored in a 64-bit register */
5395         HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data );
5396         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
5397                                          GuestStatePtr(mode64) );
5398         addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
5399                                         fr_src, am_addr ) );
5400         return;
5401      }
5402      if (ty == Ity_D64) {
5403         HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data );
5404         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
5405                                          GuestStatePtr(mode64) );
5406         addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
5407         return;
5408      }
5409      break;
5410   }
5411
5412   /* --------- Indexed PUT --------- */
5413   case Ist_PutI: {
5414      IRPutI *puti = stmt->Ist.PutI.details;
5415
5416      PPCAMode* dst_am
5417         = genGuestArrayOffset(
5418              env, puti->descr,
5419                   puti->ix, puti->bias );
5420      IRType ty = typeOfIRExpr(env->type_env, puti->data);
5421      if (mode64 && ty == Ity_I64) {
5422         HReg r_src = iselWordExpr_R(env, puti->data);
5423         addInstr(env, PPCInstr_Store( toUChar(8),
5424                                       dst_am, r_src, mode64 ));
5425         return;
5426      }
5427      if ((!mode64) && ty == Ity_I32) {
5428         HReg r_src = iselWordExpr_R(env, puti->data);
5429         addInstr(env, PPCInstr_Store( toUChar(4),
5430                                       dst_am, r_src, mode64 ));
5431         return;
5432      }
5433      break;
5434   }
5435
5436   /* --------- TMP --------- */
5437   case Ist_WrTmp: {
5438      IRTemp tmp = stmt->Ist.WrTmp.tmp;
5439      IRType ty = typeOfIRTemp(env->type_env, tmp);
5440      if (ty == Ity_I8  || ty == Ity_I16 ||
5441          ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
5442         HReg r_dst = lookupIRTemp(env, tmp);
5443         HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data);
5444         addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
5445         return;
5446      }
5447      if (!mode64 && ty == Ity_I64) {
5448         HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
5449
5450         iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data);
5451         lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
5452         addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
5453         addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
5454         return;
5455      }
5456      if (mode64 && ty == Ity_I128) {
5457         HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
5458         iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data);
5459         lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
5460         addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
5461         addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
5462         return;
5463      }
5464      if (!mode64 && ty == Ity_I128) {
5465         HReg r_srcHi, r_srcMedHi, r_srcMedLo, r_srcLo;
5466         HReg r_dstHi, r_dstMedHi, r_dstMedLo, r_dstLo;
5467
5468         iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
5469                                &r_srcMedLo, &r_srcLo,
5470                                env, stmt->Ist.WrTmp.data);
5471
5472         lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
5473                           &r_dstLo, env, tmp);
5474
5475         addInstr(env, mk_iMOVds_RR(r_dstHi,    r_srcHi) );
5476         addInstr(env, mk_iMOVds_RR(r_dstMedHi, r_srcMedHi) );
5477         addInstr(env, mk_iMOVds_RR(r_dstMedLo, r_srcMedLo) );
5478         addInstr(env, mk_iMOVds_RR(r_dstLo,    r_srcLo) );
5479         return;
5480      }
5481      if (ty == Ity_I1) {
5482         PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5483         HReg r_dst = lookupIRTemp(env, tmp);
5484         addInstr(env, PPCInstr_Set(cond, r_dst));
5485         return;
5486      }
5487      if (ty == Ity_F64) {
5488         HReg fr_dst = lookupIRTemp(env, tmp);
5489         HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5490         addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
5491         return;
5492      }
5493      if (ty == Ity_F32) {
5494         HReg fr_dst = lookupIRTemp(env, tmp);
5495         HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5496         addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
5497         return;
5498      }
5499      if (ty == Ity_D32) {
5500         HReg fr_dst = lookupIRTemp(env, tmp);
5501         HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data);
5502         addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
5503         return;
5504      }
5505      if (ty == Ity_V128) {
5506         HReg v_dst = lookupIRTemp(env, tmp);
5507         HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data);
5508         addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
5509         return;
5510      }
5511      if (ty == Ity_D64) {
5512         HReg fr_dst = lookupIRTemp( env, tmp );
5513         HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data );
5514         addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
5515         return;
5516      }
5517      if (ty == Ity_D128) {
5518         HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
5519	 //         lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
5520         lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
5521         iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data );
5522         addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
5523         addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
5524         return;
5525      }
5526      break;
5527   }
5528
5529   /* --------- Load Linked or Store Conditional --------- */
5530   case Ist_LLSC: {
5531      IRTemp res    = stmt->Ist.LLSC.result;
5532      IRType tyRes  = typeOfIRTemp(env->type_env, res);
5533      IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
5534
5535      if (stmt->Ist.LLSC.end != Iend_BE)
5536         goto stmt_fail;
5537      if (!mode64 && (tyAddr != Ity_I32))
5538         goto stmt_fail;
5539      if (mode64 && (tyAddr != Ity_I64))
5540         goto stmt_fail;
5541
5542      if (stmt->Ist.LLSC.storedata == NULL) {
5543         /* LL */
5544         HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr );
5545         HReg r_dst  = lookupIRTemp(env, res);
5546         if (tyRes == Ity_I32) {
5547            addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
5548            return;
5549         }
5550         if (tyRes == Ity_I64 && mode64) {
5551            addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
5552            return;
5553         }
5554         /* fallthru */;
5555      } else {
5556         /* SC */
5557         HReg   r_res  = lookupIRTemp(env, res); /* :: Ity_I1 */
5558         HReg   r_a    = iselWordExpr_R(env, stmt->Ist.LLSC.addr);
5559         HReg   r_src  = iselWordExpr_R(env, stmt->Ist.LLSC.storedata);
5560         HReg   r_tmp  = newVRegI(env);
5561         IRType tyData = typeOfIRExpr(env->type_env,
5562                                      stmt->Ist.LLSC.storedata);
5563         vassert(tyRes == Ity_I1);
5564         if (tyData == Ity_I32 || (tyData == Ity_I64 && mode64)) {
5565            addInstr(env, PPCInstr_StoreC( tyData==Ity_I32 ? 4 : 8,
5566                                           r_a, r_src, mode64 ));
5567            addInstr(env, PPCInstr_MfCR( r_tmp ));
5568            addInstr(env, PPCInstr_Shft(
5569                             Pshft_SHR,
5570                             env->mode64 ? False : True
5571                                /*F:64-bit, T:32-bit shift*/,
5572                             r_tmp, r_tmp,
5573                             PPCRH_Imm(False/*unsigned*/, 29)));
5574            /* Probably unnecessary, since the IR dest type is Ity_I1,
5575               and so we are entitled to leave whatever junk we like
5576               drifting round in the upper 31 or 63 bits of r_res.
5577               However, for the sake of conservativeness .. */
5578            addInstr(env, PPCInstr_Alu(
5579                             Palu_AND,
5580                             r_res, r_tmp,
5581                             PPCRH_Imm(False/*signed*/, 1)));
5582            return;
5583         }
5584         /* fallthru */
5585      }
5586      goto stmt_fail;
5587      /*NOTREACHED*/
5588   }
5589
5590   /* --------- Call to DIRTY helper --------- */
5591   case Ist_Dirty: {
5592      IRDirty* d = stmt->Ist.Dirty.details;
5593
5594      /* Figure out the return type, if any. */
5595      IRType retty = Ity_INVALID;
5596      if (d->tmp != IRTemp_INVALID)
5597         retty = typeOfIRTemp(env->type_env, d->tmp);
5598
5599      /* Throw out any return types we don't know about. */
5600      Bool retty_ok = False;
5601      if (mode64) {
5602         switch (retty) {
5603            case Ity_INVALID: /* function doesn't return anything */
5604            case Ity_V128:
5605            case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
5606               retty_ok = True; break;
5607            default:
5608               break;
5609         }
5610      } else {
5611         switch (retty) {
5612            case Ity_INVALID: /* function doesn't return anything */
5613            case Ity_V128:
5614            case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
5615               retty_ok = True; break;
5616            default:
5617               break;
5618         }
5619      }
5620      if (!retty_ok)
5621         break; /* will go to stmt_fail: */
5622
5623      /* Marshal args, do the call, clear stack, set the return value
5624         to 0x555..555 if this is a conditional call that returns a
5625         value and the call is skipped. */
5626      UInt   addToSp = 0;
5627      RetLoc rloc    = mk_RetLoc_INVALID();
5628      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
5629      vassert(is_sane_RetLoc(rloc));
5630
5631      /* Now figure out what to do with the returned value, if any. */
5632      switch (retty) {
5633         case Ity_INVALID: {
5634            /* No return value.  Nothing to do. */
5635            vassert(d->tmp == IRTemp_INVALID);
5636            vassert(rloc.pri == RLPri_None);
5637            vassert(addToSp == 0);
5638            return;
5639         }
5640         case Ity_I32: case Ity_I16: case Ity_I8: {
5641            /* The returned value is in %r3.  Park it in the register
5642               associated with tmp. */
5643            HReg r_dst = lookupIRTemp(env, d->tmp);
5644            addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
5645            vassert(rloc.pri == RLPri_Int);
5646            vassert(addToSp == 0);
5647            return;
5648         }
5649         case Ity_I64:
5650            if (mode64) {
5651               /* The returned value is in %r3.  Park it in the register
5652                  associated with tmp. */
5653               HReg r_dst = lookupIRTemp(env, d->tmp);
5654               addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
5655               vassert(rloc.pri == RLPri_Int);
5656               vassert(addToSp == 0);
5657            } else {
5658               /* The returned value is in %r3:%r4.  Park it in the
5659                  register-pair associated with tmp. */
5660               HReg r_dstHi = INVALID_HREG;
5661               HReg r_dstLo = INVALID_HREG;
5662               lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
5663               addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
5664               addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
5665               vassert(rloc.pri == RLPri_2Int);
5666               vassert(addToSp == 0);
5667            }
5668            return;
5669         case Ity_V128: {
5670            /* The returned value is on the stack, and *retloc tells
5671               us where.  Fish it off the stack and then move the
5672               stack pointer upwards to clear it, as directed by
5673               doHelperCall. */
5674            vassert(rloc.pri == RLPri_V128SpRel);
5675            vassert(addToSp >= 16);
5676            HReg      dst = lookupIRTemp(env, d->tmp);
5677            PPCAMode* am  = PPCAMode_IR(rloc.spOff, StackFramePtr(mode64));
5678            addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, dst, am ));
5679            add_to_sp(env, addToSp);
5680            return;
5681         }
5682         default:
5683            /*NOTREACHED*/
5684            vassert(0);
5685      }
5686   }
5687
5688   /* --------- MEM FENCE --------- */
5689   case Ist_MBE:
5690      switch (stmt->Ist.MBE.event) {
5691         case Imbe_Fence:
5692            addInstr(env, PPCInstr_MFence());
5693            return;
5694         default:
5695            break;
5696      }
5697      break;
5698
5699   /* --------- INSTR MARK --------- */
5700   /* Doesn't generate any executable code ... */
5701   case Ist_IMark:
5702       return;
5703
5704   /* --------- ABI HINT --------- */
5705   /* These have no meaning (denotation in the IR) and so we ignore
5706      them ... if any actually made it this far. */
5707   case Ist_AbiHint:
5708       return;
5709
5710   /* --------- NO-OP --------- */
5711   /* Fairly self-explanatory, wouldn't you say? */
5712   case Ist_NoOp:
5713       return;
5714
5715   /* --------- EXIT --------- */
5716   case Ist_Exit: {
5717      IRConst* dst = stmt->Ist.Exit.dst;
5718      if (!mode64 && dst->tag != Ico_U32)
5719         vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
5720      if (mode64 && dst->tag != Ico_U64)
5721         vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
5722
5723      PPCCondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
5724      PPCAMode*   amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
5725                                      hregPPC_GPR31(mode64));
5726
5727      /* Case: boring transfer to known address */
5728      if (stmt->Ist.Exit.jk == Ijk_Boring
5729          || stmt->Ist.Exit.jk == Ijk_Call
5730          /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
5731         if (env->chainingAllowed) {
5732            /* .. almost always true .. */
5733            /* Skip the event check at the dst if this is a forwards
5734               edge. */
5735            Bool toFastEP
5736               = mode64
5737               ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
5738               : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
5739            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
5740            addInstr(env, PPCInstr_XDirect(
5741                             mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
5742                                    : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
5743                             amCIA, cc, toFastEP));
5744         } else {
5745            /* .. very occasionally .. */
5746            /* We can't use chaining, so ask for an assisted transfer,
5747               as that's the only alternative that is allowable. */
5748            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5749            addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
5750         }
5751         return;
5752      }
5753
5754      /* Case: assisted transfer to arbitrary address */
5755      switch (stmt->Ist.Exit.jk) {
5756         /* Keep this list in sync with that in iselNext below */
5757         case Ijk_ClientReq:
5758         case Ijk_EmFail:
5759         case Ijk_EmWarn:
5760         case Ijk_NoDecode:
5761         case Ijk_NoRedir:
5762         case Ijk_SigBUS:
5763         case Ijk_SigTRAP:
5764         case Ijk_Sys_syscall:
5765         case Ijk_InvalICache:
5766         {
5767            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5768            addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
5769                                             stmt->Ist.Exit.jk));
5770            return;
5771         }
5772         default:
5773            break;
5774      }
5775
5776      /* Do we ever expect to see any other kind? */
5777      goto stmt_fail;
5778   }
5779
5780   default: break;
5781   }
5782  stmt_fail:
5783   ppIRStmt(stmt);
5784   vpanic("iselStmt(ppc)");
5785}
5786
5787
5788/*---------------------------------------------------------*/
5789/*--- ISEL: Basic block terminators (Nexts)             ---*/
5790/*---------------------------------------------------------*/
5791
5792static void iselNext ( ISelEnv* env,
5793                       IRExpr* next, IRJumpKind jk, Int offsIP )
5794{
5795   if (vex_traceflags & VEX_TRACE_VCODE) {
5796      vex_printf( "\n-- PUT(%d) = ", offsIP);
5797      ppIRExpr( next );
5798      vex_printf( "; exit-");
5799      ppIRJumpKind(jk);
5800      vex_printf( "\n");
5801   }
5802
5803   PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
5804
5805   /* Case: boring transfer to known address */
5806   if (next->tag == Iex_Const) {
5807      IRConst* cdst = next->Iex.Const.con;
5808      vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
5809      if (jk == Ijk_Boring || jk == Ijk_Call) {
5810         /* Boring transfer to known address */
5811         PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
5812         if (env->chainingAllowed) {
5813            /* .. almost always true .. */
5814            /* Skip the event check at the dst if this is a forwards
5815               edge. */
5816            Bool toFastEP
5817               = env->mode64
5818               ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
5819               : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
5820            if (0) vex_printf("%s", toFastEP ? "X" : ".");
5821            addInstr(env, PPCInstr_XDirect(
5822                             env->mode64 ? (Addr64)cdst->Ico.U64
5823                                         : (Addr64)cdst->Ico.U32,
5824                             amCIA, always, toFastEP));
5825         } else {
5826            /* .. very occasionally .. */
5827            /* We can't use chaining, so ask for an assisted transfer,
5828               as that's the only alternative that is allowable. */
5829            HReg r = iselWordExpr_R(env, next);
5830            addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
5831                                             Ijk_Boring));
5832         }
5833         return;
5834      }
5835   }
5836
5837   /* Case: call/return (==boring) transfer to any address */
5838   switch (jk) {
5839      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
5840         HReg       r     = iselWordExpr_R(env, next);
5841         PPCAMode*  amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
5842         if (env->chainingAllowed) {
5843            addInstr(env, PPCInstr_XIndir(r, amCIA, always));
5844         } else {
5845            addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
5846                                             Ijk_Boring));
5847         }
5848         return;
5849      }
5850      default:
5851         break;
5852   }
5853
5854   /* Case: assisted transfer to arbitrary address */
5855   switch (jk) {
5856      /* Keep this list in sync with that for Ist_Exit above */
5857      case Ijk_ClientReq:
5858      case Ijk_EmFail:
5859      case Ijk_EmWarn:
5860      case Ijk_NoDecode:
5861      case Ijk_NoRedir:
5862      case Ijk_SigBUS:
5863      case Ijk_SigTRAP:
5864      case Ijk_Sys_syscall:
5865      case Ijk_InvalICache:
5866      {
5867         HReg      r     = iselWordExpr_R(env, next);
5868         PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
5869         addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
5870         return;
5871      }
5872      default:
5873         break;
5874   }
5875
5876   vex_printf( "\n-- PUT(%d) = ", offsIP);
5877   ppIRExpr( next );
5878   vex_printf( "; exit-");
5879   ppIRJumpKind(jk);
5880   vex_printf( "\n");
5881   vassert(0); // are we expecting any other kind?
5882}
5883
5884
5885/*---------------------------------------------------------*/
5886/*--- Insn selector top-level                           ---*/
5887/*---------------------------------------------------------*/
5888
5889/* Translate an entire SB to ppc code. */
5890HInstrArray* iselSB_PPC ( IRSB* bb,
5891                          VexArch      arch_host,
5892                          VexArchInfo* archinfo_host,
5893                          VexAbiInfo*  vbi,
5894                          Int offs_Host_EvC_Counter,
5895                          Int offs_Host_EvC_FailAddr,
5896                          Bool chainingAllowed,
5897                          Bool addProfInc,
5898                          Addr64 max_ga )
5899{
5900   Int       i, j;
5901   HReg      hregLo, hregMedLo, hregMedHi, hregHi;
5902   ISelEnv*  env;
5903   UInt      hwcaps_host = archinfo_host->hwcaps;
5904   Bool      mode64 = False;
5905   UInt      mask32, mask64;
5906   PPCAMode *amCounter, *amFailAddr;
5907
5908
5909   vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
5910   mode64 = arch_host == VexArchPPC64;
5911   if (!mode64) vassert(max_ga <= 0xFFFFFFFFULL);
5912
5913   /* do some sanity checks */
5914   mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
5915            | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
5916            | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
5917
5918
5919   mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
5920            | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
5921            | VEX_HWCAPS_PPC64_ISA2_07;
5922
5923   if (mode64) {
5924      vassert((hwcaps_host & mask32) == 0);
5925   } else {
5926      vassert((hwcaps_host & mask64) == 0);
5927   }
5928
5929   /* Make up an initial environment to use. */
5930   env = LibVEX_Alloc(sizeof(ISelEnv));
5931   env->vreg_ctr = 0;
5932
5933   /* Are we being ppc32 or ppc64? */
5934   env->mode64 = mode64;
5935
5936   /* Set up output code array. */
5937   env->code = newHInstrArray();
5938
5939   /* Copy BB's type env. */
5940   env->type_env = bb->tyenv;
5941
5942   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
5943    * change as we go along.
5944    *
5945    * vregmap2 and vregmap3 are only used in 32 bit mode
5946    * for supporting I128 in 32-bit mode
5947    */
5948   env->n_vregmap = bb->tyenv->types_used;
5949   env->vregmapLo    = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5950   env->vregmapMedLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5951   if (mode64) {
5952      env->vregmapMedHi = NULL;
5953      env->vregmapHi    = NULL;
5954   } else {
5955      env->vregmapMedHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5956      env->vregmapHi    = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5957   }
5958
5959   /* and finally ... */
5960   env->chainingAllowed = chainingAllowed;
5961   env->max_ga          = max_ga;
5962   env->hwcaps          = hwcaps_host;
5963   env->previous_rm     = NULL;
5964   env->vbi             = vbi;
5965
5966   /* For each IR temporary, allocate a suitably-kinded virtual
5967      register. */
5968   j = 0;
5969   for (i = 0; i < env->n_vregmap; i++) {
5970      hregLo = hregMedLo = hregMedHi = hregHi = INVALID_HREG;
5971      switch (bb->tyenv->types[i]) {
5972      case Ity_I1:
5973      case Ity_I8:
5974      case Ity_I16:
5975      case Ity_I32:
5976         if (mode64) { hregLo    = mkHReg(j++, HRcInt64,  True); break;
5977         } else {      hregLo    = mkHReg(j++, HRcInt32,  True); break;
5978         }
5979      case Ity_I64:
5980         if (mode64) { hregLo    = mkHReg(j++, HRcInt64,  True); break;
5981         } else {      hregLo    = mkHReg(j++, HRcInt32,  True);
5982         hregMedLo = mkHReg(j++, HRcInt32,  True); break;
5983         }
5984      case Ity_I128:
5985         if (mode64) { hregLo    = mkHReg(j++, HRcInt64,  True);
5986         hregMedLo = mkHReg(j++, HRcInt64,  True); break;
5987         } else {      hregLo    = mkHReg(j++, HRcInt32,  True);
5988         hregMedLo = mkHReg(j++, HRcInt32,  True);
5989         hregMedHi = mkHReg(j++, HRcInt32,  True);
5990         hregHi    = mkHReg(j++, HRcInt32,  True); break;
5991         }
5992      case Ity_F32:
5993      case Ity_F64:    hregLo    = mkHReg(j++, HRcFlt64,  True); break;
5994      case Ity_V128:   hregLo    = mkHReg(j++, HRcVec128, True); break;
5995      case Ity_D32:
5996      case Ity_D64:    hregLo    = mkHReg(j++, HRcFlt64,  True); break;
5997      case Ity_D128:   hregLo    = mkHReg(j++, HRcFlt64,  True);
5998      hregMedLo = mkHReg(j++, HRcFlt64,  True); break;
5999      default:
6000         ppIRType(bb->tyenv->types[i]);
6001         vpanic("iselBB(ppc): IRTemp type");
6002      }
6003      env->vregmapLo[i]    = hregLo;
6004      env->vregmapMedLo[i] = hregMedLo;
6005      if (!mode64) {
6006         env->vregmapMedHi[i] = hregMedHi;
6007         env->vregmapHi[i]    = hregHi;
6008      }
6009   }
6010   env->vreg_ctr = j;
6011
6012   /* The very first instruction must be an event check. */
6013   amCounter  = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
6014   amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
6015   addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
6016
6017   /* Possibly a block counter increment (for profiling).  At this
6018      point we don't know the address of the counter, so just pretend
6019      it is zero.  It will have to be patched later, but before this
6020      translation is used, by a call to LibVEX_patchProfCtr. */
6021   if (addProfInc) {
6022      addInstr(env, PPCInstr_ProfInc());
6023   }
6024
6025   /* Ok, finally we can iterate over the statements. */
6026   for (i = 0; i < bb->stmts_used; i++)
6027      iselStmt(env, bb->stmts[i]);
6028
6029   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6030
6031   /* record the number of vregs we used. */
6032   env->code->n_vregs = env->vreg_ctr;
6033   return env->code;
6034}
6035
6036
6037/*---------------------------------------------------------------*/
6038/*--- end                                     host_ppc_isel.c ---*/
6039/*---------------------------------------------------------------*/
6040