1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_ppc_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2017 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
40#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
45#include "host_ppc_defs.h"
46
47/* GPR register class for ppc32/64 */
48#define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
49
50
51/*---------------------------------------------------------*/
52/*--- Register Usage Conventions                        ---*/
53/*---------------------------------------------------------*/
54/*
55  Integer Regs
56  ------------
57  GPR0       Reserved
58  GPR1       Stack Pointer
59  GPR2       not used - TOC pointer
60  GPR3:10    Allocateable
61  GPR11      if mode64: not used - calls by ptr / env ptr for some langs
62  GPR12      if mode64: not used - exceptions / global linkage code
63  GPR13      not used - Thread-specific pointer
64  GPR14:28   Allocateable
65  GPR29      Unused by us (reserved for the dispatcher)
66  GPR30      AltiVec temp spill register
67  GPR31      GuestStatePointer
68
69  Of Allocateable regs:
70  if (mode64)
71    GPR3:10  Caller-saved regs
72  else
73    GPR3:12  Caller-saved regs
74  GPR14:29   Callee-saved regs
75
76  GPR3       [Return | Parameter] - carrying reg
77  GPR4:10    Parameter-carrying regs
78
79
80  Floating Point Regs
81  -------------------
82  FPR0:31    Allocateable
83
84  FPR0       Caller-saved - scratch reg
85  if (mode64)
86    FPR1:13  Caller-saved - param & return regs
87  else
88    FPR1:8   Caller-saved - param & return regs
89    FPR9:13  Caller-saved regs
90  FPR14:31   Callee-saved regs
91
92
93  Vector Regs (on processors with the VMX feature)
94  -----------
95  VR0-VR1    Volatile scratch registers
96  VR2-VR13   Volatile vector parameters registers
97  VR14-VR19  Volatile scratch registers
98  VR20-VR31  Non-volatile registers
99  VRSAVE     Non-volatile 32-bit register
100*/
101
102
103/*---------------------------------------------------------*/
104/*--- PPC FP Status & Control Register Conventions      ---*/
105/*---------------------------------------------------------*/
106/*
107  Vex-generated code expects to run with the FPU set as follows: all
108  exceptions masked.  The rounding mode is set appropriately before
109  each floating point insn emitted (or left unchanged if known to be
110  correct already).  There are a few fp insns (fmr,fneg,fabs,fnabs),
111  which are unaffected by the rm and so the rounding mode is not set
112  prior to them.
113
114  At least on MPC7447A (Mac Mini), frsqrte is also not affected by
115  rounding mode.  At some point the ppc docs get sufficiently vague
116  that the only way to find out is to write test programs.
117*/
118/* Notes on the FP instruction set, 6 Feb 06.
119
120What                 exns -> CR1 ?   Sets FPRF ?   Observes RM ?
121-------------------------------------------------------------
122
123fmr[.]                   if .             n             n
124fneg[.]                  if .             n             n
125fabs[.]                  if .             n             n
126fnabs[.]                 if .             n             n
127
128fadd[.]                  if .             y             y
129fadds[.]                 if .             y             y
130fcfid[.] (Si64->dbl)     if .             y             y
131fcfidU[.] (Ui64->dbl)    if .             y             y
132fcfids[.] (Si64->sngl)   if .             Y             Y
133fcfidus[.] (Ui64->sngl)  if .             Y             Y
134fcmpo (cmp, result       n                n             n
135fcmpu  to crfD)          n                n             n
136fctid[.]  (dbl->i64)     if .       ->undef             y
137fctidz[.] (dbl->i64)     if .       ->undef    rounds-to-zero
138fctiw[.]  (dbl->i32)     if .       ->undef             y
139fctiwz[.] (dbl->i32)     if .       ->undef    rounds-to-zero
140fdiv[.]                  if .             y             y
141fdivs[.]                 if .             y             y
142fmadd[.]                 if .             y             y
143fmadds[.]                if .             y             y
144fmsub[.]                 if .             y             y
145fmsubs[.]                if .             y             y
146fmul[.]                  if .             y             y
147fmuls[.]                 if .             y             y
148
149(note: for fnm*, rounding happens before final negation)
150fnmadd[.]                if .             y             y
151fnmadds[.]               if .             y             y
152fnmsub[.]                if .             y             y
153fnmsubs[.]               if .             y             y
154
155fre[.]                   if .             y             y
156fres[.]                  if .             y             y
157
158frsqrte[.]               if .             y       apparently not
159
160fsqrt[.]                 if .             y             y
161fsqrts[.]                if .             y             y
162fsub[.]                  if .             y             y
163fsubs[.]                 if .             y             y
164
165
166fpscr: bits 30-31 (ibm) is RM
167            24-29 (ibm) are exnmasks/non-IEEE bit, all zero
168	    15-19 (ibm) is FPRF: class, <, =, >, UNord
169
170ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
171in future)
172
173mcrfs     - move fpscr field to CR field
174mtfsfi[.] - 4 bit imm moved to fpscr field
175mtfsf[.]  - move frS[low 1/2] to fpscr but using 8-bit field mask
176mtfsb1[.] - set given fpscr bit
177mtfsb0[.] - clear given fpscr bit
178mffs[.]   - move all fpscr to frD[low 1/2]
179
180For [.] presumably cr1 is set with exn summary bits, as per
181main FP insns
182
183A single precision store truncates/denormalises the in-register value,
184but does not round it.  This is so that flds followed by fsts is
185always the identity.
186*/
187
188
189/*---------------------------------------------------------*/
190/*--- misc helpers                                      ---*/
191/*---------------------------------------------------------*/
192
193/* These are duplicated in guest-ppc/toIR.c */
194static IRExpr* unop ( IROp op, IRExpr* a )
195{
196   return IRExpr_Unop(op, a);
197}
198
199static IRExpr* mkU32 ( UInt i )
200{
201   return IRExpr_Const(IRConst_U32(i));
202}
203
204static IRExpr* bind ( Int binder )
205{
206   return IRExpr_Binder(binder);
207}
208
209static Bool isZeroU8 ( IRExpr* e )
210{
211   return e->tag == Iex_Const
212          && e->Iex.Const.con->tag == Ico_U8
213          && e->Iex.Const.con->Ico.U8 == 0;
214}
215
216
217/*---------------------------------------------------------*/
218/*--- ISelEnv                                           ---*/
219/*---------------------------------------------------------*/
220
221/* This carries around:
222
223   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
224     might encounter.  This is computed before insn selection starts,
225     and does not change.
226
227   - A mapping from IRTemp to HReg.  This tells the insn selector
228     which virtual register(s) are associated with each IRTemp
229     temporary.  This is computed before insn selection starts, and
230     does not change.  We expect this mapping to map precisely the
231     same set of IRTemps as the type mapping does.
232
233         - vregmapLo    holds the primary register for the IRTemp.
234         - vregmapMedLo holds the secondary register for the IRTemp,
235              if any is needed.  That's only for Ity_I64 temps
236              in 32 bit mode or Ity_I128 temps in 64-bit mode.
237         - vregmapMedHi is only for dealing with Ity_I128 temps in
238              32 bit mode.  It holds bits 95:64 (Intel numbering)
239              of the IRTemp.
240         - vregmapHi is also only for dealing with Ity_I128 temps
241              in 32 bit mode.  It holds the most significant bits
242              (127:96 in Intel numbering) of the IRTemp.
243
244    - The code array, that is, the insns selected so far.
245
246    - A counter, for generating new virtual registers.
247
248    - The host subarchitecture we are selecting insns for.
249      This is set at the start and does not change.
250
251    - A Bool to tell us if the host is 32 or 64bit.
252      This is set at the start and does not change.
253
254    - An IRExpr*, which may be NULL, holding the IR expression (an
255      IRRoundingMode-encoded value) to which the FPU's rounding mode
256      was most recently set.  Setting to NULL is always safe.  Used to
257      avoid redundant settings of the FPU's rounding mode, as
258      described in set_FPU_rounding_mode below.
259
260    - A VexMiscInfo*, needed for knowing how to generate
261      function calls for this target.
262
263    - The maximum guest address of any guest insn in this block.
264      Actually, the address of the highest-addressed byte from any
265      insn in this block.  Is set at the start and does not change.
266      This is used for detecting jumps which are definitely
267      forward-edges from this block, and therefore can be made
268      (chained) to the fast entry point of the destination, thereby
269      avoiding the destination's event check.
270*/
271
272typedef
273   struct {
274      /* Constant -- are set at the start and do not change. */
275      IRTypeEnv* type_env;
276                              //    64-bit mode              32-bit mode
277      HReg*    vregmapLo;     // Low 64-bits [63:0]    Low 32-bits     [31:0]
278      HReg*    vregmapMedLo;  // high 64-bits[127:64]  Next 32-bits    [63:32]
279      HReg*    vregmapMedHi;  // unused                Next 32-bits    [95:64]
280      HReg*    vregmapHi;     // unused                highest 32-bits [127:96]
281      Int      n_vregmap;
282
283      /* 27 Jan 06: Not currently used, but should be */
284      UInt         hwcaps;
285
286      Bool         mode64;
287
288      const VexAbiInfo*  vbi;   // unused
289
290      Bool         chainingAllowed;
291      Addr64       max_ga;
292
293      /* These are modified as we go along. */
294      HInstrArray* code;
295      Int          vreg_ctr;
296
297      IRExpr*      previous_rm;
298   }
299   ISelEnv;
300
301
302static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
303{
304   vassert(tmp >= 0);
305   vassert(tmp < env->n_vregmap);
306   return env->vregmapLo[tmp];
307}
308
309static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
310                               ISelEnv* env, IRTemp tmp )
311{
312   vassert(tmp >= 0);
313   vassert(tmp < env->n_vregmap);
314   vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
315   *vrLO = env->vregmapLo[tmp];
316   *vrHI = env->vregmapMedLo[tmp];
317}
318
319/* Only for used in 32-bit mode */
320static void lookupIRTempQuad ( HReg* vrHi, HReg* vrMedHi, HReg* vrMedLo,
321                               HReg* vrLo, ISelEnv* env, IRTemp tmp )
322{
323   vassert(!env->mode64);
324   vassert(tmp >= 0);
325   vassert(tmp < env->n_vregmap);
326   vassert(! hregIsInvalid(env->vregmapMedLo[tmp]));
327   *vrHi    = env->vregmapHi[tmp];
328   *vrMedHi = env->vregmapMedHi[tmp];
329   *vrMedLo = env->vregmapMedLo[tmp];
330   *vrLo    = env->vregmapLo[tmp];
331}
332
333static void addInstr ( ISelEnv* env, PPCInstr* instr )
334{
335   addHInstr(env->code, instr);
336   if (vex_traceflags & VEX_TRACE_VCODE) {
337      ppPPCInstr(instr, env->mode64);
338      vex_printf("\n");
339   }
340}
341
342static HReg newVRegI ( ISelEnv* env )
343{
344   HReg reg
345      = mkHReg(True/*vreg*/, HRcGPR(env->mode64), 0/*enc*/, env->vreg_ctr);
346   env->vreg_ctr++;
347   return reg;
348}
349
350static HReg newVRegF ( ISelEnv* env )
351{
352   HReg reg = mkHReg(True/*vreg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
353   env->vreg_ctr++;
354   return reg;
355}
356
357static HReg newVRegV ( ISelEnv* env )
358{
359   HReg reg = mkHReg(True/*vreg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
360   env->vreg_ctr++;
361   return reg;
362}
363
364
365/*---------------------------------------------------------*/
366/*--- ISEL: Forward declarations                        ---*/
367/*---------------------------------------------------------*/
368
369/* These are organised as iselXXX and iselXXX_wrk pairs.  The
370   iselXXX_wrk do the real work, but are not to be called directly.
371   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
372   checks that all returned registers are virtual.  You should not
373   call the _wrk version directly.
374
375   'Word' refers to the size of the native machine word, that is,
376   32-bit int in 32-bit mode and 64-bit int in 64-bit mode.  '2Word'
377   therefore refers to a double-width (64/128-bit) quantity in two
378   integer registers.
379*/
380/* 32-bit mode: compute an I8/I16/I32 into a GPR.
381   64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
382static HReg          iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
383                                          IREndness IEndianess );
384static HReg          iselWordExpr_R     ( ISelEnv* env, const IRExpr* e,
385                                          IREndness IEndianess );
386
387/* 32-bit mode: Compute an I8/I16/I32 into a RH
388                (reg-or-halfword-immediate).
389   64-bit mode: Compute an I8/I16/I32/I64 into a RH
390                (reg-or-halfword-immediate).
391   It's important to specify whether the immediate is to be regarded
392   as signed or not.  If yes, this will never return -32768 as an
393   immediate; this guaranteed that all signed immediates that are
394   return can have their sign inverted if need be.
395*/
396static PPCRH*        iselWordExpr_RH_wrk ( ISelEnv* env,
397                                           Bool syned, const IRExpr* e,
398                                           IREndness IEndianess );
399static PPCRH*        iselWordExpr_RH     ( ISelEnv* env,
400                                           Bool syned, const IRExpr* e,
401                                           IREndness IEndianess );
402
403/* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
404   64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
405static PPCRI*        iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
406                                           IREndness IEndianess );
407static PPCRI*        iselWordExpr_RI     ( ISelEnv* env, const IRExpr* e,
408                                           IREndness IEndianess );
409
410/* In 32 bit mode ONLY, compute an I8 into a
411   reg-or-5-bit-unsigned-immediate, the latter being an immediate in
412   the range 1 .. 31 inclusive.  Used for doing shift amounts. */
413static PPCRH*        iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
414                                             IREndness IEndianess );
415static PPCRH*        iselWordExpr_RH5u     ( ISelEnv* env, const IRExpr* e,
416                                             IREndness IEndianess );
417
418/* In 64-bit mode ONLY, compute an I8 into a
419   reg-or-6-bit-unsigned-immediate, the latter being an immediate in
420   the range 1 .. 63 inclusive.  Used for doing shift amounts. */
421static PPCRH*        iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
422                                             IREndness IEndianess );
423static PPCRH*        iselWordExpr_RH6u     ( ISelEnv* env, const IRExpr* e,
424                                             IREndness IEndianess );
425
426/* 32-bit mode: compute an I32 into an AMode.
427   64-bit mode: compute an I64 into an AMode.
428
429   Requires to know (xferTy) the type of data to be loaded/stored
430   using this amode.  That is so that, for 64-bit code generation, any
431   PPCAMode_IR returned will have an index (immediate offset) field
432   that is guaranteed to be 4-aligned, if there is any chance that the
433   amode is to be used in ld/ldu/lda/std/stdu.
434
435   Since there are no such restrictions on 32-bit insns, xferTy is
436   ignored for 32-bit code generation. */
437static PPCAMode*     iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
438                                              IRType xferTy,
439                                              IREndness IEndianess );
440static PPCAMode*     iselWordExpr_AMode     ( ISelEnv* env, const IRExpr* e,
441                                              IRType xferTy,
442                                              IREndness IEndianess );
443
444static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
445                                         HReg* rMedLo, HReg* rLo,
446                                         ISelEnv* env, const IRExpr* e,
447                                         IREndness IEndianess );
448static void iselInt128Expr_to_32x4     ( HReg* rHi, HReg* rMedHi,
449                                         HReg* rMedLo, HReg* rLo,
450                                         ISelEnv* env, const IRExpr* e,
451                                         IREndness IEndianess );
452
453
454/* 32-bit mode ONLY: compute an I64 into a GPR pair. */
455static void          iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
456                                         ISelEnv* env, const IRExpr* e,
457                                         IREndness IEndianess );
458static void          iselInt64Expr     ( HReg* rHi, HReg* rLo,
459                                         ISelEnv* env, const IRExpr* e,
460                                         IREndness IEndianess );
461
462/* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
463static void          iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
464                                          ISelEnv* env, const IRExpr* e,
465                                          IREndness IEndianess );
466
467static void          iselInt128Expr     ( HReg* rHi, HReg* rLo,
468                                          ISelEnv* env, const IRExpr* e,
469                                          IREndness IEndianess );
470
471static PPCCondCode   iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
472                                        IREndness IEndianess );
473static PPCCondCode   iselCondCode     ( ISelEnv* env, const IRExpr* e,
474                                        IREndness IEndianess );
475
476static HReg          iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
477                                       IREndness IEndianess );
478static HReg          iselDblExpr     ( ISelEnv* env, const IRExpr* e,
479                                       IREndness IEndianess );
480
481static HReg          iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
482                                       IREndness IEndianess );
483static HReg          iselFltExpr     ( ISelEnv* env, const IRExpr* e,
484                                       IREndness IEndianess );
485
486static HReg          iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
487                                       IREndness IEndianess );
488static HReg          iselVecExpr     ( ISelEnv* env, const IRExpr* e,
489                                       IREndness IEndianess );
490
491/* 64-bit mode ONLY. */
492static HReg          iselDfp32Expr_wrk ( ISelEnv* env, const IRExpr* e,
493                                         IREndness IEndianess );
494static HReg          iselDfp32Expr     ( ISelEnv* env, const IRExpr* e,
495                                         IREndness IEndianess );
496static HReg          iselDfp64Expr_wrk ( ISelEnv* env, const IRExpr* e,
497                                         IREndness IEndianess );
498static HReg          iselDfp64Expr     ( ISelEnv* env, const IRExpr* e,
499                                         IREndness IEndianess );
500static HReg iselFp128Expr_wrk ( ISelEnv* env, const IRExpr* e,
501                                IREndness IEndianess);
502static HReg iselFp128Expr     ( ISelEnv* env, const IRExpr* e,
503                                IREndness IEndianess);
504
505/* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
506static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
507                                 const IRExpr* e, IREndness IEndianess );
508static void iselDfp128Expr     ( HReg* rHi, HReg* rLo, ISelEnv* env,
509                                 const IRExpr* e, IREndness IEndianess );
510
511/*---------------------------------------------------------*/
512/*--- ISEL: Misc helpers                                ---*/
513/*---------------------------------------------------------*/
514
515/* Make an int reg-reg move. */
516
517static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
518{
519   vassert(hregClass(r_dst) == hregClass(r_src));
520   vassert(hregClass(r_src) ==  HRcInt32 ||
521           hregClass(r_src) ==  HRcInt64);
522   return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
523}
524
525/* Advance/retreat %r1 by n. */
526
527static void add_to_sp ( ISelEnv* env, UInt n )
528{
529   HReg sp = StackFramePtr(env->mode64);
530   vassert(n <= 1024 && (n%16) == 0);
531   addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
532                               PPCRH_Imm(True,toUShort(n)) ));
533}
534
535static void sub_from_sp ( ISelEnv* env, UInt n )
536{
537   HReg sp = StackFramePtr(env->mode64);
538   vassert(n <= 1024 && (n%16) == 0);
539   addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
540                               PPCRH_Imm(True,toUShort(n)) ));
541}
542
543/*
544  returns a quadword aligned address on the stack
545   - copies SP, adds 16bytes, aligns to quadword.
546  use sub_from_sp(32) before calling this,
547  as expects to have 32 bytes to play with.
548*/
549static HReg get_sp_aligned16 ( ISelEnv* env )
550{
551   HReg       r = newVRegI(env);
552   HReg align16 = newVRegI(env);
553   addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
554   // add 16
555   addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
556                               PPCRH_Imm(True,toUShort(16)) ));
557   // mask to quadword
558   addInstr(env,
559            PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
560   addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
561   return r;
562}
563
564
565
566/* Load 2*I32 regs to fp reg */
567static HReg mk_LoadRR32toFPR ( ISelEnv* env,
568                               HReg r_srcHi, HReg r_srcLo )
569{
570   HReg fr_dst = newVRegF(env);
571   PPCAMode *am_addr0, *am_addr1;
572
573   vassert(!env->mode64);
574   vassert(hregClass(r_srcHi) == HRcInt32);
575   vassert(hregClass(r_srcLo) == HRcInt32);
576
577   sub_from_sp( env, 16 );        // Move SP down 16 bytes
578   am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
579   am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
580
581   // store hi,lo as Ity_I32's
582   addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
583   addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
584
585   // load as float
586   addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
587
588   add_to_sp( env, 16 );          // Reset SP
589   return fr_dst;
590}
591
592/* Load I64 reg to fp reg */
593static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
594{
595   HReg fr_dst = newVRegF(env);
596   PPCAMode *am_addr0;
597
598   vassert(env->mode64);
599   vassert(hregClass(r_src) == HRcInt64);
600
601   sub_from_sp( env, 16 );        // Move SP down 16 bytes
602   am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
603
604   // store as Ity_I64
605   addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
606
607   // load as float
608   addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
609
610   add_to_sp( env, 16 );          // Reset SP
611   return fr_dst;
612}
613
614
615/* Given an amode, return one which references 4 bytes further
616   along. */
617
618static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
619{
620   PPCAMode* am4 = dopyPPCAMode( am );
621   if (am4->tag == Pam_IR
622       && am4->Pam.IR.index + 4 <= 32767) {
623      am4->Pam.IR.index += 4;
624   } else {
625      vpanic("advance4(ppc,host)");
626   }
627   return am4;
628}
629
630
631/* Given a guest-state array descriptor, an index expression and a
632   bias, generate a PPCAMode pointing at the relevant piece of
633   guest state.  */
634static
635PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
636                                IRExpr* off, Int bias, IREndness IEndianess )
637{
638   HReg rtmp, roff;
639   Int  elemSz = sizeofIRType(descr->elemTy);
640   Int  nElems = descr->nElems;
641   Int  shift  = 0;
642
643   /* Throw out any cases we don't need.  In theory there might be a
644      day where we need to handle others, but not today. */
645
646   if (nElems != 16 && nElems != 32)
647      vpanic("genGuestArrayOffset(ppc host)(1)");
648
649   switch (elemSz) {
650      case 4:  shift = 2; break;
651      case 8:  shift = 3; break;
652      default: vpanic("genGuestArrayOffset(ppc host)(2)");
653   }
654
655   if (bias < -100 || bias > 100) /* somewhat arbitrarily */
656      vpanic("genGuestArrayOffset(ppc host)(3)");
657   if (descr->base < 0 || descr->base > 5000) /* somewhat arbitrarily */
658      vpanic("genGuestArrayOffset(ppc host)(4)");
659
660   /* Compute off into a reg, %off.  Then return:
661
662         addi %tmp, %off, bias (if bias != 0)
663         andi %tmp, nElems-1
664         sldi %tmp, shift
665         addi %tmp, %tmp, base
666         ... Baseblockptr + %tmp ...
667   */
668   roff = iselWordExpr_R(env, off, IEndianess);
669   rtmp = newVRegI(env);
670   addInstr(env, PPCInstr_Alu(
671                    Palu_ADD,
672                    rtmp, roff,
673                    PPCRH_Imm(True/*signed*/, toUShort(bias))));
674   addInstr(env, PPCInstr_Alu(
675                    Palu_AND,
676                    rtmp, rtmp,
677                    PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
678   addInstr(env, PPCInstr_Shft(
679                    Pshft_SHL,
680                    env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
681                    rtmp, rtmp,
682                    PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
683   addInstr(env, PPCInstr_Alu(
684                    Palu_ADD,
685                    rtmp, rtmp,
686                    PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
687   return
688      PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
689}
690
691
692/*---------------------------------------------------------*/
693/*--- ISEL: Function call helpers                       ---*/
694/*---------------------------------------------------------*/
695
696/* Used only in doHelperCall.  See big comment in doHelperCall re
697   handling of register-parameter args.  This function figures out
698   whether evaluation of an expression might require use of a fixed
699   register.  If in doubt return True (safe but suboptimal).
700*/
701static
702Bool mightRequireFixedRegs ( IRExpr* e )
703{
704   switch (e->tag) {
705   case Iex_RdTmp: case Iex_Const: case Iex_Get:
706      return False;
707   default:
708      return True;
709   }
710}
711
712
713/* Do a complete function call.  |guard| is a Ity_Bit expression
714   indicating whether or not the call happens.  If guard==NULL, the
715   call is unconditional.  |retloc| is set to indicate where the
716   return value is after the call.  The caller (of this fn) must
717   generate code to add |stackAdjustAfterCall| to the stack pointer
718   after the call is done. */
719
720static
721void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
722                    /*OUT*/RetLoc* retloc,
723                    ISelEnv* env,
724                    IRExpr* guard,
725                    IRCallee* cee, IRType retTy, IRExpr** args,
726                    IREndness IEndianess)
727{
728   PPCCondCode cc;
729   HReg        argregs[PPC_N_REGPARMS];
730   HReg        tmpregs[PPC_N_REGPARMS];
731   Bool        go_fast;
732   Int         n_args, i, argreg;
733   UInt        argiregs;
734   Bool        mode64 = env->mode64;
735
736   /* Set default returns.  We'll update them later if needed. */
737   *stackAdjustAfterCall = 0;
738   *retloc               = mk_RetLoc_INVALID();
739
740   /* These are used for cross-checking that IR-level constraints on
741      the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
742   UInt nVECRETs = 0;
743   UInt nGSPTRs  = 0;
744
745   /* Marshal args for a call and do the call.
746
747      This function only deals with a tiny set of possibilities, which
748      cover all helpers in practice.  The restrictions are that only
749      arguments in registers are supported, hence only PPC_N_REGPARMS x
750      (mode32:32 | mode64:64) integer bits in total can be passed.
751      In fact the only supported arg type is (mode32:I32 | mode64:I64).
752
753      The return type can be I{64,32,16,8} or V{128,256}.  In the
754      latter two cases, it is expected that |args| will contain the
755      special node IRExpr_VECRET(), in which case this routine
756      generates code to allocate space on the stack for the vector
757      return value.  Since we are not passing any scalars on the
758      stack, it is enough to preallocate the return space before
759      marshalling any arguments, in this case.
760
761      |args| may also contain IRExpr_GSPTR(), in which case the value
762      in the guest state pointer register is passed as the
763      corresponding argument.
764
765      Generating code which is both efficient and correct when
766      parameters are to be passed in registers is difficult, for the
767      reasons elaborated in detail in comments attached to
768      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
769      of the method described in those comments.
770
771      The problem is split into two cases: the fast scheme and the
772      slow scheme.  In the fast scheme, arguments are computed
773      directly into the target (real) registers.  This is only safe
774      when we can be sure that computation of each argument will not
775      trash any real registers set by computation of any other
776      argument.
777
778      In the slow scheme, all args are first computed into vregs, and
779      once they are all done, they are moved to the relevant real
780      regs.  This always gives correct code, but it also gives a bunch
781      of vreg-to-rreg moves which are usually redundant but are hard
782      for the register allocator to get rid of.
783
784      To decide which scheme to use, all argument expressions are
785      first examined.  If they are all so simple that it is clear they
786      will be evaluated without use of any fixed registers, use the
787      fast scheme, else use the slow scheme.  Note also that only
788      unconditional calls may use the fast scheme, since having to
789      compute a condition expression could itself trash real
790      registers.
791
792      Note this requires being able to examine an expression and
793      determine whether or not evaluation of it might use a fixed
794      register.  That requires knowledge of how the rest of this insn
795      selector works.  Currently just the following 3 are regarded as
796      safe -- hopefully they cover the majority of arguments in
797      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
798   */
799
800   /* Note that the cee->regparms field is meaningless on PPC32/64 host
801      (since there is only one calling convention) and so we always
802      ignore it. */
803
804   n_args = 0;
805   for (i = 0; args[i]; i++)
806      n_args++;
807
808   if (n_args > PPC_N_REGPARMS) {
809      vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
810      // PPC_N_REGPARMS
811   }
812
813   /* This is kind of stupid .. the arrays are sized as PPC_N_REGPARMS
814      but we then assume that that value is 8. */
815   vassert(PPC_N_REGPARMS == 8);
816
817   argregs[0] = hregPPC_GPR3(mode64);
818   argregs[1] = hregPPC_GPR4(mode64);
819   argregs[2] = hregPPC_GPR5(mode64);
820   argregs[3] = hregPPC_GPR6(mode64);
821   argregs[4] = hregPPC_GPR7(mode64);
822   argregs[5] = hregPPC_GPR8(mode64);
823   argregs[6] = hregPPC_GPR9(mode64);
824   argregs[7] = hregPPC_GPR10(mode64);
825   argiregs = 0;
826
827   tmpregs[0] = tmpregs[1] = tmpregs[2] =
828   tmpregs[3] = tmpregs[4] = tmpregs[5] =
829   tmpregs[6] = tmpregs[7] = INVALID_HREG;
830
831   /* First decide which scheme (slow or fast) is to be used.  First
832      assume the fast scheme, and select slow if any contraindications
833      (wow) appear. */
834
835   go_fast = True;
836
837   /* We'll need space on the stack for the return value.  Avoid
838      possible complications with nested calls by using the slow
839      scheme. */
840   if (retTy == Ity_V128 || retTy == Ity_V256)
841      go_fast = False;
842
843   if (go_fast && guard) {
844      if (guard->tag == Iex_Const
845          && guard->Iex.Const.con->tag == Ico_U1
846          && guard->Iex.Const.con->Ico.U1 == True) {
847         /* unconditional */
848      } else {
849         /* Not manifestly unconditional -- be conservative. */
850         go_fast = False;
851      }
852   }
853
854   if (go_fast) {
855      for (i = 0; i < n_args; i++) {
856         IRExpr* arg = args[i];
857         if (UNLIKELY(arg->tag == Iex_GSPTR)) {
858            /* that's OK */
859         }
860         else if (UNLIKELY(arg->tag == Iex_VECRET)) {
861            /* This implies ill-formed IR, since if the IR was
862               well-formed, the return-type test above would have
863               filtered it out. */
864            vpanic("doHelperCall(PPC): invalid IR");
865         }
866         else if (mightRequireFixedRegs(arg)) {
867            go_fast = False;
868            break;
869         }
870      }
871   }
872
873   /* At this point the scheme to use has been established.  Generate
874      code to get the arg values into the argument rregs. */
875
876   if (go_fast) {
877
878      /* FAST SCHEME */
879      argreg = 0;
880
881      for (i = 0; i < n_args; i++) {
882         IRExpr* arg = args[i];
883         vassert(argreg < PPC_N_REGPARMS);
884
885         if (arg->tag == Iex_GSPTR) {
886            argiregs |= (1 << (argreg+3));
887            addInstr(env, mk_iMOVds_RR( argregs[argreg],
888                                        GuestStatePtr(mode64) ));
889            argreg++;
890         } else {
891            vassert(arg->tag != Iex_VECRET);
892            IRType ty = typeOfIRExpr(env->type_env, arg);
893            vassert(ty == Ity_I32 || ty == Ity_I64);
894            if (!mode64) {
895               if (ty == Ity_I32) {
896                  argiregs |= (1 << (argreg+3));
897                  addInstr(env,
898                           mk_iMOVds_RR( argregs[argreg],
899                                         iselWordExpr_R(env, arg,
900							IEndianess) ));
901               } else { // Ity_I64 in 32-bit mode
902                  HReg rHi, rLo;
903                  if ((argreg%2) == 1)
904                                 // ppc32 ELF abi spec for passing LONG_LONG
905                     argreg++;   // XXX: odd argreg => even rN
906                  vassert(argreg < PPC_N_REGPARMS-1);
907                  iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
908                  argiregs |= (1 << (argreg+3));
909                  addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
910                  argiregs |= (1 << (argreg+3));
911                  addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
912               }
913            } else { // mode64
914               argiregs |= (1 << (argreg+3));
915               addInstr(env, mk_iMOVds_RR( argregs[argreg],
916                                           iselWordExpr_R(env, arg,
917                                                          IEndianess) ));
918            }
919            argreg++;
920         } /* if (arg == IRExprP__BBPR) */
921      }
922
923      /* Fast scheme only applies for unconditional calls.  Hence: */
924      cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
925
926   } else {
927
928      /* SLOW SCHEME; move via temporaries */
929      argreg = 0;
930
931      /* If we have a vector return type, allocate a place for it on
932         the stack and record its address.  Rather than figure out the
933         complexities of PPC{32,64} ELF ABI stack frame layout, simply
934         drop the SP by 1024 and allocate the return point in the
935         middle.  I think this should comfortably clear any ABI
936         mandated register save areas.  Note that it doesn't maintain
937         the backchain as it should, since we're not doing st{d,w}u to
938         adjust the SP, but .. that doesn't seem to be a big deal.
939         Since we're not expecting to have to unwind out of here. */
940      HReg r_vecRetAddr = INVALID_HREG;
941      if (retTy == Ity_V128) {
942         r_vecRetAddr = newVRegI(env);
943         sub_from_sp(env, 512);
944         addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
945         sub_from_sp(env, 512);
946      }
947      else if (retTy == Ity_V256) {
948         vassert(0); //ATC
949         r_vecRetAddr = newVRegI(env);
950         sub_from_sp(env, 512);
951         addInstr(env, mk_iMOVds_RR( r_vecRetAddr, StackFramePtr(mode64) ));
952         sub_from_sp(env, 512);
953      }
954
955      vassert(n_args >= 0 && n_args <= 8);
956      for (i = 0; i < n_args; i++) {
957         IRExpr* arg = args[i];
958         vassert(argreg < PPC_N_REGPARMS);
959         if (UNLIKELY(arg->tag == Iex_GSPTR)) {
960            tmpregs[argreg] = newVRegI(env);
961            addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
962                                        GuestStatePtr(mode64) ));
963            nGSPTRs++;
964         }
965         else if (UNLIKELY(arg->tag == Iex_VECRET)) {
966            /* We stashed the address of the return slot earlier, so just
967               retrieve it now. */
968            vassert(!hregIsInvalid(r_vecRetAddr));
969            tmpregs[i] = r_vecRetAddr;
970            nVECRETs++;
971         }
972         else {
973            IRType ty = typeOfIRExpr(env->type_env, arg);
974            vassert(ty == Ity_I32 || ty == Ity_I64);
975            if (!mode64) {
976               if (ty == Ity_I32) {
977                  tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
978               } else { // Ity_I64 in 32-bit mode
979                  HReg rHi, rLo;
980                  if ((argreg%2) == 1)
981                                // ppc32 ELF abi spec for passing LONG_LONG
982                     argreg++;  // XXX: odd argreg => even rN
983                  vassert(argreg < PPC_N_REGPARMS-1);
984                  iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
985                  tmpregs[argreg++] = rHi;
986                  tmpregs[argreg]   = rLo;
987               }
988            } else { // mode64
989               tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
990            }
991         }
992         argreg++;
993      }
994
995      /* Now we can compute the condition.  We can't do it earlier
996         because the argument computations could trash the condition
997         codes.  Be a bit clever to handle the common case where the
998         guard is 1:Bit. */
999      cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
1000      if (guard) {
1001         if (guard->tag == Iex_Const
1002             && guard->Iex.Const.con->tag == Ico_U1
1003             && guard->Iex.Const.con->Ico.U1 == True) {
1004            /* unconditional -- do nothing */
1005         } else {
1006            cc = iselCondCode( env, guard, IEndianess );
1007         }
1008      }
1009
1010      /* Move the args to their final destinations. */
1011      for (i = 0; i < argreg; i++) {
1012         if (hregIsInvalid(tmpregs[i]))  // Skip invalid regs
1013            continue;
1014         /* None of these insns, including any spill code that might
1015            be generated, may alter the condition codes. */
1016         argiregs |= (1 << (i+3));
1017         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
1018      }
1019
1020   }
1021
1022   /* Do final checks, set the return values, and generate the call
1023      instruction proper. */
1024   if (retTy == Ity_V128 || retTy == Ity_V256) {
1025      vassert(nVECRETs == 1);
1026   } else {
1027      vassert(nVECRETs == 0);
1028   }
1029
1030   vassert(nGSPTRs == 0 || nGSPTRs == 1);
1031
1032   vassert(*stackAdjustAfterCall == 0);
1033   vassert(is_RetLoc_INVALID(*retloc));
1034   switch (retTy) {
1035      case Ity_INVALID:
1036         /* Function doesn't return a value. */
1037         *retloc = mk_RetLoc_simple(RLPri_None);
1038         break;
1039      case Ity_I64:
1040         *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
1041         break;
1042      case Ity_I32: case Ity_I16: case Ity_I8:
1043         *retloc = mk_RetLoc_simple(RLPri_Int);
1044         break;
1045      case Ity_V128:
1046         /* Result is 512 bytes up the stack, and after it has been
1047            retrieved, adjust SP upwards by 1024. */
1048         *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 512);
1049         *stackAdjustAfterCall = 1024;
1050         break;
1051      case Ity_V256:
1052         vassert(0); // ATC
1053         /* Ditto */
1054         *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 512);
1055         *stackAdjustAfterCall = 1024;
1056         break;
1057      default:
1058         /* IR can denote other possible return types, but we don't
1059            handle those here. */
1060         vassert(0);
1061   }
1062
1063   /* Finally, generate the call itself.  This needs the *retloc value
1064      set in the switch above, which is why it's at the end. */
1065
1066   Addr64 target = mode64 ? (Addr)cee->addr
1067                          : toUInt((Addr)(cee->addr));
1068   addInstr(env, PPCInstr_Call( cc, target, argiregs, *retloc ));
1069}
1070
1071
1072/*---------------------------------------------------------*/
1073/*--- ISEL: FP rounding mode helpers                    ---*/
1074/*---------------------------------------------------------*/
1075
1076///* Set FPU's rounding mode to the default */
1077//static
1078//void set_FPU_rounding_default ( ISelEnv* env )
1079//{
1080//   HReg fr_src = newVRegF(env);
1081//   HReg r_src  = newVRegI(env);
1082//
1083//   /* Default rounding mode = 0x0
1084//      Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
1085//       - so we can set the whole register at once (faster)
1086//      note: upper 32 bits ignored by FpLdFPSCR
1087//   */
1088//   addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
1089//   if (env->mode64) {
1090//      fr_src = mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
1091//   } else {
1092//      fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1093//   }
1094//   addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
1095//}
1096
1097/* Convert IR rounding mode to PPC encoding */
1098static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
1099{
1100   /*
1101   rounding mode                     | PPC  |  IR
1102   -----------------------------------------------
1103   to nearest, ties to even          | 000  | 000
1104   to zero                           | 001  | 011
1105   to +infinity                      | 010  | 010
1106   to -infinity                      | 011  | 001
1107   +++++ Below are the extended rounding modes for decimal floating point +++++
1108   to nearest, ties away from 0      | 100  | 100
1109   to nearest, ties toward 0         | 101  | 111
1110   to away from 0                    | 110  | 110
1111   to prepare for shorter precision  | 111  | 101
1112   */
1113   HReg r_rmPPC = newVRegI(env);
1114   HReg r_tmp1  = newVRegI(env);
1115   HReg r_tmp2  = newVRegI(env);
1116
1117   vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
1118
1119   // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
1120   //
1121   // slwi  tmp1,    r_rmIR, 1
1122   // xor   tmp1,    r_rmIR, tmp1
1123   // andi  r_rmPPC, tmp1, 3
1124
1125   addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1126                               r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
1127
1128   addInstr( env, PPCInstr_Alu( Palu_AND,
1129                                r_tmp2, r_tmp1, PPCRH_Imm( False, 3 ) ) );
1130
1131   addInstr( env, PPCInstr_Alu( Palu_XOR,
1132                                r_rmPPC, r_rmIR, PPCRH_Reg( r_tmp2 ) ) );
1133
1134   return r_rmPPC;
1135}
1136
1137
1138/* Set the FPU's rounding mode: 'mode' is an I32-typed expression
1139   denoting a value in the range 0 .. 7, indicating a round mode
1140   encoded as per type IRRoundingMode.  Set the PPC FPSCR to have the
1141   same rounding.  When the dfp_rm arg is True, set the decimal
1142   floating point rounding mode bits (29:31); otherwise, set the
1143   binary floating point rounding mode bits (62:63).
1144
1145   For speed & simplicity, we're setting the *entire* FPSCR here.
1146
1147   Setting the rounding mode is expensive.  So this function tries to
1148   avoid repeatedly setting the rounding mode to the same thing by
1149   first comparing 'mode' to the 'mode' tree supplied in the previous
1150   call to this function, if any.  (The previous value is stored in
1151   env->previous_rm.)  If 'mode' is a single IR temporary 't' and
1152   env->previous_rm is also just 't', then the setting is skipped.
1153
1154   This is safe because of the SSA property of IR: an IR temporary can
1155   only be defined once and so will have the same value regardless of
1156   where it appears in the block.  Cool stuff, SSA.
1157
1158   A safety condition: all attempts to set the RM must be aware of
1159   this mechanism - by being routed through the functions here.
1160
1161   Of course this only helps if blocks where the RM is set more than
1162   once and it is set to the same value each time, *and* that value is
1163   held in the same IR temporary each time.  In order to assure the
1164   latter as much as possible, the IR optimiser takes care to do CSE
1165   on any block with any sign of floating point activity.
1166*/
1167static
1168void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm,
1169                              IREndness IEndianess )
1170{
1171   HReg fr_src = newVRegF(env);
1172   HReg r_src;
1173
1174   vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
1175
1176   /* Do we need to do anything? */
1177   if (env->previous_rm
1178       && env->previous_rm->tag == Iex_RdTmp
1179       && mode->tag == Iex_RdTmp
1180       && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
1181      /* no - setting it to what it was before.  */
1182      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
1183      return;
1184   }
1185
1186   /* No luck - we better set it, and remember what we set it to. */
1187   env->previous_rm = mode;
1188
1189   /* Only supporting the rounding-mode bits - the rest of FPSCR is
1190      0x0 - so we can set the whole register at once (faster). */
1191
1192   // Resolve rounding mode and convert to PPC representation
1193   r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode, IEndianess) );
1194
1195   // gpr -> fpr
1196   if (env->mode64) {
1197      if (dfp_rm) {
1198         HReg r_tmp1 = newVRegI( env );
1199         addInstr( env,
1200                   PPCInstr_Shft( Pshft_SHL, False/*64bit shift*/,
1201                                  r_tmp1, r_src, PPCRH_Imm( False, 32 ) ) );
1202         fr_src = mk_LoadR64toFPR( env, r_tmp1 );
1203      } else {
1204         fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
1205      }
1206   } else {
1207      if (dfp_rm) {
1208         HReg r_zero = newVRegI( env );
1209         addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
1210         fr_src = mk_LoadRR32toFPR( env, r_src, r_zero );
1211      } else {
1212         fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
1213      }
1214   }
1215
1216   // Move to FPSCR
1217   addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
1218}
1219
1220static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode,
1221                                    IREndness IEndianess )
1222{
1223   _set_FPU_rounding_mode(env, mode, False, IEndianess);
1224}
1225
1226static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
1227                                        IREndness IEndianess )
1228{
1229   _set_FPU_rounding_mode(env, mode, True, IEndianess);
1230}
1231
1232static
1233Bool FPU_rounding_mode_isOdd (IRExpr* mode) {
1234   /* If the rounding mode is set to odd, the the expr must be a constant U8
1235    * value equal to 8.  Otherwise, it must be a bin op expressiong that
1236    * calculates the value.
1237    */
1238
1239   if (mode->tag != Iex_Const)
1240      return False;
1241
1242   vassert(mode->Iex.Const.con->tag == Ico_U32);
1243   vassert(mode->Iex.Const.con->Ico.U32 == 0x8);
1244   return True;
1245}
1246
1247/*---------------------------------------------------------*/
1248/*--- ISEL: vector helpers                              ---*/
1249/*---------------------------------------------------------*/
1250
1251/* Generate all-zeroes into a new vector register.
1252*/
1253static HReg generate_zeroes_V128 ( ISelEnv* env )
1254{
1255   HReg dst = newVRegV(env);
1256   addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
1257   return dst;
1258}
1259
1260/* Generate all-ones into a new vector register.
1261*/
1262static HReg generate_ones_V128 ( ISelEnv* env )
1263{
1264   HReg dst = newVRegV(env);
1265   PPCVI5s * src = PPCVI5s_Imm(-1);
1266   addInstr(env, PPCInstr_AvSplat(8, dst, src));
1267   return dst;
1268}
1269
1270
1271/*
1272  Generates code for AvSplat
1273  - takes in IRExpr* of type 8|16|32
1274    returns vector reg of duplicated lanes of input
1275  - uses AvSplat(imm) for imms up to simm6.
1276    otherwise must use store reg & load vector
1277*/
1278static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e, IREndness IEndianess )
1279{
1280   HReg   r_src;
1281   HReg   dst = newVRegV(env);
1282   PPCRI* ri  = iselWordExpr_RI(env, e, IEndianess);
1283   IRType ty  = typeOfIRExpr(env->type_env,e);
1284   UInt   sz  = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
1285   vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
1286
1287   /* special case: immediate */
1288   if (ri->tag == Pri_Imm) {
1289      Int simm32 = (Int)ri->Pri.Imm;
1290
1291      /* figure out if it's do-able with imm splats. */
1292      if (simm32 >= -32 && simm32 <= 31) {
1293         Char simm6 = (Char)simm32;
1294         if (simm6 > 15) {           /* 16:31 inclusive */
1295            HReg v1 = newVRegV(env);
1296            HReg v2 = newVRegV(env);
1297            addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1298            addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
1299            addInstr(env,
1300               (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
1301               (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
1302                        : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
1303            return dst;
1304         }
1305         if (simm6 < -16) {          /* -32:-17 inclusive */
1306            HReg v1 = newVRegV(env);
1307            HReg v2 = newVRegV(env);
1308            addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
1309            addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
1310            addInstr(env,
1311               (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
1312               (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
1313                        : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
1314            return dst;
1315         }
1316         /* simplest form:              -16:15 inclusive */
1317         addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
1318         return dst;
1319      }
1320
1321      /* no luck; use the Slow way. */
1322      r_src = newVRegI(env);
1323      addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
1324   }
1325   else {
1326      r_src = ri->Pri.Reg;
1327   }
1328
1329   {
1330      /* Store r_src multiple times (sz dependent); then load the dest vector. */
1331      HReg r_aligned16;
1332      PPCAMode *am_offset, *am_offset_zero;
1333
1334      sub_from_sp( env, 32 );     // Move SP down
1335      /* Get a 16-aligned address within our stack space */
1336      r_aligned16 = get_sp_aligned16( env );
1337
1338      Int i;
1339      Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
1340      UChar num_bytes_to_store = stride;
1341      am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
1342      am_offset = am_offset_zero;
1343      for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
1344         addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
1345      }
1346
1347      /* Effectively splat the r_src value to dst */
1348      addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 16, dst, am_offset_zero ) );
1349      add_to_sp( env, 32 );       // Reset SP
1350
1351      return dst;
1352   }
1353}
1354
1355
1356/* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
1357static HReg isNan ( ISelEnv* env, HReg vSrc, IREndness IEndianess )
1358{
1359   HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
1360
1361   vassert(hregClass(vSrc) == HRcVec128);
1362
1363   zeros   = mk_AvDuplicateRI(env, mkU32(0), IEndianess);
1364   msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000), IEndianess);
1365   msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF), IEndianess);
1366   expt    = newVRegV(env);
1367   mnts    = newVRegV(env);
1368   vIsNan  = newVRegV(env);
1369
1370   /* 32bit float => sign(1) | exponent(8) | mantissa(23)
1371      nan => exponent all ones, mantissa > 0 */
1372
1373   addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
1374   addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
1375   addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
1376   addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
1377   addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
1378   return vIsNan;
1379}
1380
1381
1382/*---------------------------------------------------------*/
1383/*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
1384/*---------------------------------------------------------*/
1385
1386/* Select insns for an integer-typed expression, and add them to the
1387   code list.  Return a reg holding the result.  This reg will be a
1388   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
1389   want to modify it, ask for a new vreg, copy it in there, and modify
1390   the copy.  The register allocator will do its best to map both
1391   vregs to the same real register, so the copies will often disappear
1392   later in the game.
1393
1394   This should handle expressions of 64, 32, 16 and 8-bit type.
1395   All results are returned in a (mode64 ? 64bit : 32bit) register.
1396   For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
1397   are arbitrary, so you should mask or sign extend partial values
1398   if necessary.
1399*/
1400
1401static HReg iselWordExpr_R ( ISelEnv* env, const IRExpr* e,
1402                             IREndness IEndianess )
1403{
1404   HReg r = iselWordExpr_R_wrk(env, e, IEndianess);
1405   /* sanity checks ... */
1406#  if 0
1407   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1408#  endif
1409
1410   vassert(hregClass(r) == HRcGPR(env->mode64));
1411   vassert(hregIsVirtual(r));
1412   return r;
1413}
1414
1415/* DO NOT CALL THIS DIRECTLY ! */
1416static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
1417                                 IREndness IEndianess )
1418{
1419   Bool mode64 = env->mode64;
1420   MatchInfo mi;
1421   DECLARE_PATTERN(p_32to1_then_1Uto8);
1422
1423   IRType ty = typeOfIRExpr(env->type_env,e);
1424   vassert(ty == Ity_I8 || ty == Ity_I16 ||
1425           ty == Ity_I32 || ((ty == Ity_I64) && mode64));
1426
1427   switch (e->tag) {
1428
1429   /* --------- TEMP --------- */
1430   case Iex_RdTmp:
1431      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1432
1433   /* --------- LOAD --------- */
1434   case Iex_Load: {
1435      HReg      r_dst;
1436      PPCAMode* am_addr;
1437      if (e->Iex.Load.end != IEndianess)
1438         goto irreducible;
1439      r_dst   = newVRegI(env);
1440      am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/,
1441                                    IEndianess );
1442      addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
1443                                   r_dst, am_addr, mode64 ));
1444      return r_dst;
1445      /*NOTREACHED*/
1446   }
1447
1448   /* --------- BINARY OP --------- */
1449   case Iex_Binop: {
1450      PPCAluOp  aluOp;
1451      PPCShftOp shftOp;
1452
1453      /* Is it an addition or logical style op? */
1454      switch (e->Iex.Binop.op) {
1455      case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
1456         aluOp = Palu_ADD; break;
1457      case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
1458         aluOp = Palu_SUB; break;
1459      case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
1460         aluOp = Palu_AND; break;
1461      case Iop_Or8:  case Iop_Or16:  case Iop_Or32:  case Iop_Or64:
1462         aluOp = Palu_OR; break;
1463      case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
1464         aluOp = Palu_XOR; break;
1465      default:
1466         aluOp = Palu_INVALID; break;
1467      }
1468      /* For commutative ops we assume any literal
1469         values are on the second operand. */
1470      if (aluOp != Palu_INVALID) {
1471         HReg   r_dst   = newVRegI(env);
1472         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1473         PPCRH* ri_srcR = NULL;
1474         /* get right arg into an RH, in the appropriate way */
1475         switch (aluOp) {
1476         case Palu_ADD: case Palu_SUB:
1477            ri_srcR = iselWordExpr_RH(env, True/*signed*/,
1478                                      e->Iex.Binop.arg2, IEndianess);
1479            break;
1480         case Palu_AND: case Palu_OR: case Palu_XOR:
1481            ri_srcR = iselWordExpr_RH(env, False/*signed*/,
1482                                      e->Iex.Binop.arg2, IEndianess);
1483            break;
1484         default:
1485            vpanic("iselWordExpr_R_wrk-aluOp-arg2");
1486         }
1487         addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
1488         return r_dst;
1489      }
1490
1491      /* a shift? */
1492      switch (e->Iex.Binop.op) {
1493      case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
1494         shftOp = Pshft_SHL; break;
1495      case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
1496         shftOp = Pshft_SHR; break;
1497      case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
1498         shftOp = Pshft_SAR; break;
1499      default:
1500         shftOp = Pshft_INVALID; break;
1501      }
1502      /* we assume any literal values are on the second operand. */
1503      if (shftOp != Pshft_INVALID) {
1504         HReg   r_dst   = newVRegI(env);
1505         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1506         PPCRH* ri_srcR = NULL;
1507         /* get right arg into an RH, in the appropriate way */
1508         switch (shftOp) {
1509         case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
1510            if (!mode64)
1511               ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2, IEndianess);
1512            else
1513               ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2, IEndianess);
1514            break;
1515         default:
1516            vpanic("iselIntExpr_R_wrk-shftOp-arg2");
1517         }
1518         /* widen the left arg if needed */
1519         if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
1520            if (ty == Ity_I8 || ty == Ity_I16) {
1521               PPCRH* amt = PPCRH_Imm(False,
1522                                      toUShort(ty == Ity_I8 ? 24 : 16));
1523               HReg   tmp = newVRegI(env);
1524               addInstr(env, PPCInstr_Shft(Pshft_SHL,
1525                                           True/*32bit shift*/,
1526                                           tmp, r_srcL, amt));
1527               addInstr(env, PPCInstr_Shft(shftOp,
1528                                           True/*32bit shift*/,
1529                                           tmp, tmp,    amt));
1530               r_srcL = tmp;
1531               vassert(0); /* AWAITING TEST CASE */
1532            }
1533         }
1534         /* Only 64 expressions need 64bit shifts,
1535            32bit shifts are fine for all others */
1536         if (ty == Ity_I64) {
1537            vassert(mode64);
1538            addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
1539                                        r_dst, r_srcL, ri_srcR));
1540         } else {
1541            addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
1542                                        r_dst, r_srcL, ri_srcR));
1543         }
1544         return r_dst;
1545      }
1546
1547      /* How about a div? */
1548      if (e->Iex.Binop.op == Iop_DivS32 ||
1549          e->Iex.Binop.op == Iop_DivU32 ||
1550          e->Iex.Binop.op == Iop_DivS32E ||
1551          e->Iex.Binop.op == Iop_DivU32E) {
1552         Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
1553         HReg r_dst  = newVRegI(env);
1554         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1555         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1556         addInstr( env,
1557                      PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
1558                                             || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
1559                                                                                     : False,
1560                                    syned,
1561                                    True/*32bit div*/,
1562                                    r_dst,
1563                                    r_srcL,
1564                                    r_srcR ) );
1565         return r_dst;
1566      }
1567      if (e->Iex.Binop.op == Iop_DivS64 ||
1568          e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
1569          || e->Iex.Binop.op == Iop_DivU64E ) {
1570         Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
1571         HReg r_dst  = newVRegI(env);
1572         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1573         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1574         vassert(mode64);
1575         addInstr( env,
1576                      PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
1577                                             || ( e->Iex.Binop.op
1578                                                      == Iop_DivU64E ) ) ? True
1579                                                                         : False,
1580                                    syned,
1581                                    False/*64bit div*/,
1582                                    r_dst,
1583                                    r_srcL,
1584                                    r_srcR ) );
1585         return r_dst;
1586      }
1587
1588      /* No? Anyone for a mul? */
1589      if (e->Iex.Binop.op == Iop_Mul32
1590          || e->Iex.Binop.op == Iop_Mul64) {
1591         Bool syned       = False;
1592         Bool sz32        = (e->Iex.Binop.op != Iop_Mul64);
1593         HReg r_dst       = newVRegI(env);
1594         HReg r_srcL      = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1595         HReg r_srcR      = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1596         addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
1597                                     r_dst, r_srcL, r_srcR));
1598         return r_dst;
1599      }
1600
1601      /* 32 x 32 -> 64 multiply */
1602      if (mode64
1603          && (e->Iex.Binop.op == Iop_MullU32
1604              || e->Iex.Binop.op == Iop_MullS32)) {
1605         HReg tLo    = newVRegI(env);
1606         HReg tHi    = newVRegI(env);
1607         HReg r_dst  = newVRegI(env);
1608         Bool syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
1609         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1610         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1611         addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
1612                                     False/*lo32*/, True/*32bit mul*/,
1613                                     tLo, r_srcL, r_srcR));
1614         addInstr(env, PPCInstr_MulL(syned,
1615                                     True/*hi32*/, True/*32bit mul*/,
1616                                     tHi, r_srcL, r_srcR));
1617         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1618                                     r_dst, tHi, PPCRH_Imm(False,32)));
1619         addInstr(env, PPCInstr_Alu(Palu_OR,
1620                                    r_dst, r_dst, PPCRH_Reg(tLo)));
1621         return r_dst;
1622      }
1623
1624      /* El-mutanto 3-way compare? */
1625      if (e->Iex.Binop.op == Iop_CmpORD32S
1626          || e->Iex.Binop.op == Iop_CmpORD32U) {
1627         Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
1628         HReg   dst   = newVRegI(env);
1629         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1630         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1631                                        IEndianess);
1632         addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
1633                                    7/*cr*/, srcL, srcR));
1634         addInstr(env, PPCInstr_MfCR(dst));
1635         addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1636                                    PPCRH_Imm(False,7<<1)));
1637         return dst;
1638      }
1639
1640      if (e->Iex.Binop.op == Iop_CmpORD64S
1641          || e->Iex.Binop.op == Iop_CmpORD64U) {
1642         Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
1643         HReg   dst   = newVRegI(env);
1644         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1645         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
1646                                        IEndianess);
1647         vassert(mode64);
1648         addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
1649                                    7/*cr*/, srcL, srcR));
1650         addInstr(env, PPCInstr_MfCR(dst));
1651         addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
1652                                    PPCRH_Imm(False,7<<1)));
1653         return dst;
1654      }
1655
1656      if (e->Iex.Binop.op == Iop_Max32U) {
1657         HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1658         HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1659         HReg        rdst = newVRegI(env);
1660         PPCCondCode cc   = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
1661         addInstr(env, mk_iMOVds_RR(rdst, r1));
1662         addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
1663                                    7/*cr*/, rdst, PPCRH_Reg(r2)));
1664         addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
1665         return rdst;
1666      }
1667
1668      if (e->Iex.Binop.op == Iop_32HLto64) {
1669         HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
1670         HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
1671         HReg   r_Tmp = newVRegI(env);
1672         HReg   r_dst = newVRegI(env);
1673         HReg   msk   = newVRegI(env);
1674         vassert(mode64);
1675         /* r_dst = OR( r_Hi<<32, r_Lo ) */
1676         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1677                                     r_dst, r_Hi, PPCRH_Imm(False,32)));
1678         addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
1679         addInstr(env, PPCInstr_Alu( Palu_AND, r_Tmp, r_Lo,
1680                                     PPCRH_Reg(msk) ));
1681         addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
1682                                     PPCRH_Reg(r_Tmp) ));
1683         return r_dst;
1684      }
1685
1686      if ((e->Iex.Binop.op == Iop_CmpF64) ||
1687          (e->Iex.Binop.op == Iop_CmpD64) ||
1688          (e->Iex.Binop.op == Iop_CmpD128)) {
1689         HReg fr_srcL;
1690         HReg fr_srcL_lo;
1691         HReg fr_srcR;
1692         HReg fr_srcR_lo;
1693
1694         HReg r_ccPPC   = newVRegI(env);
1695         HReg r_ccIR    = newVRegI(env);
1696         HReg r_ccIR_b0 = newVRegI(env);
1697         HReg r_ccIR_b2 = newVRegI(env);
1698         HReg r_ccIR_b6 = newVRegI(env);
1699
1700         if (e->Iex.Binop.op == Iop_CmpF64) {
1701            fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
1702            fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1703            addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
1704
1705         } else if (e->Iex.Binop.op == Iop_CmpD64) {
1706            fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
1707            fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1708            addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
1709
1710         } else {    //  e->Iex.Binop.op == Iop_CmpD128
1711            iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1,
1712                           IEndianess);
1713            iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2,
1714                           IEndianess);
1715            addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
1716                                             fr_srcR, fr_srcR_lo));
1717         }
1718
1719         /* Map compare result from PPC to IR,
1720            conforming to CmpF64 definition. */
1721         /*
1722           FP cmp result | PPC | IR
1723           --------------------------
1724           UN            | 0x1 | 0x45
1725           EQ            | 0x2 | 0x40
1726           GT            | 0x4 | 0x00
1727           LT            | 0x8 | 0x01
1728         */
1729
1730         // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
1731         addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1732                                     r_ccIR_b0, r_ccPPC,
1733                                     PPCRH_Imm(False,0x3)));
1734         addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b0,
1735                                    r_ccPPC,   PPCRH_Reg(r_ccIR_b0)));
1736         addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
1737                                    r_ccIR_b0, PPCRH_Imm(False,0x1)));
1738
1739         // r_ccIR_b2 = r_ccPPC[0]
1740         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1741                                     r_ccIR_b2, r_ccPPC,
1742                                     PPCRH_Imm(False,0x2)));
1743         addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
1744                                    r_ccIR_b2, PPCRH_Imm(False,0x4)));
1745
1746         // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
1747         addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
1748                                     r_ccIR_b6, r_ccPPC,
1749                                     PPCRH_Imm(False,0x1)));
1750         addInstr(env, PPCInstr_Alu(Palu_OR,  r_ccIR_b6,
1751                                    r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
1752         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1753                                     r_ccIR_b6, r_ccIR_b6,
1754                                     PPCRH_Imm(False,0x6)));
1755         addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
1756                                    r_ccIR_b6, PPCRH_Imm(False,0x40)));
1757
1758         // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
1759         addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1760                                    r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
1761         addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
1762                                    r_ccIR,    PPCRH_Reg(r_ccIR_b6)));
1763         return r_ccIR;
1764      }
1765
1766      if ( e->Iex.Binop.op == Iop_F64toI32S ||
1767               e->Iex.Binop.op == Iop_F64toI32U ) {
1768         /* This works in both mode64 and mode32. */
1769         HReg      r1      = StackFramePtr(env->mode64);
1770         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1771         HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
1772         HReg      ftmp    = newVRegF(env);
1773         HReg      idst    = newVRegI(env);
1774
1775         /* Set host rounding mode */
1776         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1777
1778         sub_from_sp( env, 16 );
1779         addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
1780                                       e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
1781                                                                     : False,
1782                                       True/*flt64*/,
1783                                       ftmp, fsrc));
1784         addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
1785         addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
1786
1787         /* in 64-bit mode we need to sign-widen idst. */
1788         if (mode64)
1789            addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
1790
1791         add_to_sp( env, 16 );
1792
1793         ///* Restore default FPU rounding. */
1794         //set_FPU_rounding_default( env );
1795         return idst;
1796      }
1797
1798      if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
1799         if (mode64) {
1800            HReg      r1      = StackFramePtr(env->mode64);
1801            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1802            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
1803                                            IEndianess);
1804            HReg      idst    = newVRegI(env);
1805            HReg      ftmp    = newVRegF(env);
1806
1807            /* Set host rounding mode */
1808            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1809
1810            sub_from_sp( env, 16 );
1811            addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
1812                                          ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
1813                                                                            : False,
1814                                          True, ftmp, fsrc));
1815            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1816            addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1817            add_to_sp( env, 16 );
1818
1819            ///* Restore default FPU rounding. */
1820            //set_FPU_rounding_default( env );
1821            return idst;
1822         }
1823      }
1824
1825      if (e->Iex.Binop.op == Iop_D64toI64S ) {
1826         HReg      r1      = StackFramePtr(env->mode64);
1827         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
1828         HReg      fr_src  = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
1829         HReg      idst    = newVRegI(env);
1830         HReg      ftmp    = newVRegF(env);
1831
1832         /* Set host rounding mode */
1833         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1834         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
1835         sub_from_sp( env, 16 );
1836         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1837         addInstr(env, PPCInstr_Load(8, idst, zero_r1, mode64));
1838
1839         add_to_sp( env, 16 );
1840
1841         ///* Restore default FPU rounding. */
1842         //set_FPU_rounding_default( env );
1843         return idst;
1844      }
1845
1846      if (e->Iex.Binop.op == Iop_D128toI64S ) {
1847         PPCFpOp fpop = Pfp_DCTFIXQ;
1848         HReg r_srcHi = newVRegF(env);
1849         HReg r_srcLo = newVRegF(env);
1850         HReg idst    = newVRegI(env);
1851         HReg ftmp    = newVRegF(env);
1852         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
1853
1854         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
1855         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
1856                        IEndianess);
1857         addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
1858
1859         // put the D64 result into an integer register
1860         sub_from_sp( env, 16 );
1861         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
1862         addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
1863         add_to_sp( env, 16 );
1864         return idst;
1865      }
1866      break;
1867   }
1868
1869   /* --------- UNARY OP --------- */
1870   case Iex_Unop: {
1871      IROp op_unop = e->Iex.Unop.op;
1872
1873      /* 1Uto8(32to1(expr32)) */
1874      DEFINE_PATTERN(p_32to1_then_1Uto8,
1875                     unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1876      if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1877         const IRExpr* expr32 = mi.bindee[0];
1878         HReg r_dst = newVRegI(env);
1879         HReg r_src = iselWordExpr_R(env, expr32, IEndianess);
1880         addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
1881                                    r_src, PPCRH_Imm(False,1)));
1882         return r_dst;
1883      }
1884
1885      /* 16Uto32(LDbe:I16(expr32)) */
1886      {
1887         DECLARE_PATTERN(p_LDbe16_then_16Uto32);
1888         DEFINE_PATTERN(p_LDbe16_then_16Uto32,
1889                        unop(Iop_16Uto32,
1890                             IRExpr_Load(IEndianess,Ity_I16,bind(0))) );
1891         if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
1892            HReg r_dst = newVRegI(env);
1893            PPCAMode* amode
1894               = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/,
1895                                     IEndianess );
1896            addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
1897            return r_dst;
1898         }
1899      }
1900
1901      switch (op_unop) {
1902      case Iop_8Uto16:
1903      case Iop_8Uto32:
1904      case Iop_8Uto64:
1905      case Iop_16Uto32:
1906      case Iop_16Uto64: {
1907         HReg   r_dst = newVRegI(env);
1908         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1909         UShort mask  = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
1910                                 op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
1911         addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
1912                                    PPCRH_Imm(False,mask)));
1913         return r_dst;
1914      }
1915      case Iop_32Uto64: {
1916         HReg r_dst = newVRegI(env);
1917         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1918         vassert(mode64);
1919         addInstr(env,
1920                  PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1921                                r_dst, r_src, PPCRH_Imm(False,32)));
1922         addInstr(env,
1923                  PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1924                                r_dst, r_dst, PPCRH_Imm(False,32)));
1925         return r_dst;
1926      }
1927      case Iop_8Sto16:
1928      case Iop_8Sto32:
1929      case Iop_16Sto32: {
1930         HReg   r_dst = newVRegI(env);
1931         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1932         UShort amt   = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
1933         addInstr(env,
1934                  PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
1935                                r_dst, r_src, PPCRH_Imm(False,amt)));
1936         addInstr(env,
1937                  PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1938                                r_dst, r_dst, PPCRH_Imm(False,amt)));
1939         return r_dst;
1940      }
1941      case Iop_8Sto64:
1942      case Iop_16Sto64: {
1943         HReg   r_dst = newVRegI(env);
1944         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1945         UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 : 48);
1946         vassert(mode64);
1947         addInstr(env,
1948                  PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
1949                                r_dst, r_src, PPCRH_Imm(False,amt)));
1950         addInstr(env,
1951                  PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
1952                                r_dst, r_dst, PPCRH_Imm(False,amt)));
1953         return r_dst;
1954      }
1955      case Iop_32Sto64: {
1956         HReg   r_dst = newVRegI(env);
1957         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1958	 vassert(mode64);
1959         /* According to the IBM docs, in 64 bit mode, srawi r,r,0
1960            sign extends the lower 32 bits into the upper 32 bits. */
1961         addInstr(env,
1962                  PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
1963                                r_dst, r_src, PPCRH_Imm(False,0)));
1964         return r_dst;
1965      }
1966      case Iop_Not8:
1967      case Iop_Not16:
1968      case Iop_Not32:
1969      case Iop_Not64: {
1970         if (op_unop == Iop_Not64) vassert(mode64);
1971         HReg r_dst = newVRegI(env);
1972         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1973         addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
1974         return r_dst;
1975      }
1976      case Iop_64HIto32: {
1977         if (!mode64) {
1978            HReg rHi, rLo;
1979            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1980            return rHi; /* and abandon rLo .. poor wee thing :-) */
1981         } else {
1982            HReg   r_dst = newVRegI(env);
1983            HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1984            addInstr(env,
1985                     PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
1986                                   r_dst, r_src, PPCRH_Imm(False,32)));
1987            return r_dst;
1988         }
1989      }
1990      case Iop_64to32: {
1991         if (!mode64) {
1992            HReg rHi, rLo;
1993            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
1994            return rLo; /* similar stupid comment to the above ... */
1995         } else {
1996            /* This is a no-op. */
1997            return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
1998         }
1999      }
2000      case Iop_64to16: {
2001         if (mode64) { /* This is a no-op. */
2002            return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2003         }
2004         break; /* evidently not used in 32-bit mode */
2005      }
2006      case Iop_16HIto8:
2007      case Iop_32HIto16: {
2008         HReg   r_dst = newVRegI(env);
2009         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2010         UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
2011         addInstr(env,
2012                  PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
2013                                r_dst, r_src, PPCRH_Imm(False,shift)));
2014         return r_dst;
2015      }
2016      case Iop_128HIto64:
2017         if (mode64) {
2018            HReg rHi, rLo;
2019            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2020            return rHi; /* and abandon rLo .. poor wee thing :-) */
2021         }
2022         break;
2023      case Iop_128to64:
2024         if (mode64) {
2025            HReg rHi, rLo;
2026            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
2027            return rLo; /* similar stupid comment to the above ... */
2028         }
2029         break;
2030      case Iop_1Uto64:
2031      case Iop_1Uto32:
2032      case Iop_1Uto8:
2033         if ((op_unop != Iop_1Uto64) || mode64) {
2034            HReg        r_dst = newVRegI(env);
2035            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2036            addInstr(env, PPCInstr_Set(cond,r_dst));
2037            return r_dst;
2038         }
2039         break;
2040      case Iop_1Sto8:
2041      case Iop_1Sto16:
2042      case Iop_1Sto32: {
2043         /* could do better than this, but for now ... */
2044         HReg        r_dst = newVRegI(env);
2045         PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2046         addInstr(env, PPCInstr_Set(cond,r_dst));
2047         addInstr(env,
2048                  PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
2049                                r_dst, r_dst, PPCRH_Imm(False,31)));
2050         addInstr(env,
2051                  PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2052                                r_dst, r_dst, PPCRH_Imm(False,31)));
2053         return r_dst;
2054      }
2055      case Iop_1Sto64:
2056         if (mode64) {
2057            /* could do better than this, but for now ... */
2058            HReg        r_dst = newVRegI(env);
2059            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2060            addInstr(env, PPCInstr_Set(cond,r_dst));
2061            addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
2062                                        r_dst, r_dst, PPCRH_Imm(False,63)));
2063            addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2064                                        r_dst, r_dst, PPCRH_Imm(False,63)));
2065            return r_dst;
2066         }
2067         break;
2068      case Iop_Clz32:
2069      case Iop_Clz64: {
2070         HReg r_src, r_dst;
2071         PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
2072                                                      Pun_CLZ64;
2073         if (op_unop == Iop_Clz64 && !mode64)
2074            goto irreducible;
2075         /* Count leading zeroes. */
2076         r_dst = newVRegI(env);
2077         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2078         addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2079         return r_dst;
2080      }
2081
2082      case Iop_Ctz32:
2083      case Iop_Ctz64: {
2084         HReg r_src, r_dst;
2085         PPCUnaryOp op_clz = (op_unop == Iop_Ctz32) ? Pun_CTZ32 :
2086                                                      Pun_CTZ64;
2087         if (op_unop == Iop_Ctz64 && !mode64)
2088            goto irreducible;
2089         /* Count trailing zeroes. */
2090         r_dst = newVRegI(env);
2091         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2092         addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
2093         return r_dst;
2094      }
2095
2096      case Iop_Left8:
2097      case Iop_Left16:
2098      case Iop_Left32:
2099      case Iop_Left64: {
2100         HReg r_src, r_dst;
2101         if (op_unop == Iop_Left64 && !mode64)
2102            goto irreducible;
2103         r_dst = newVRegI(env);
2104         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2105         addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2106         addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2107         return r_dst;
2108      }
2109
2110      case Iop_CmpwNEZ32: {
2111         HReg r_dst = newVRegI(env);
2112         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2113         addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2114         addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2115         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
2116                                     r_dst, r_dst, PPCRH_Imm(False, 31)));
2117         return r_dst;
2118      }
2119
2120      case Iop_CmpwNEZ64: {
2121         HReg r_dst = newVRegI(env);
2122         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2123         if (!mode64) goto irreducible;
2124         addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
2125         addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
2126         addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
2127                                     r_dst, r_dst, PPCRH_Imm(False, 63)));
2128         return r_dst;
2129      }
2130
2131      case Iop_V128to32: {
2132         HReg        r_aligned16;
2133         HReg        dst  = newVRegI(env);
2134         HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2135         PPCAMode *am_off0, *am_off_word0;
2136         sub_from_sp( env, 32 );     // Move SP down 32 bytes
2137
2138         // get a quadword aligned address within our stack space
2139         r_aligned16 = get_sp_aligned16( env );
2140         am_off0  = PPCAMode_IR( 0, r_aligned16 );
2141
2142         /* Note that the store below (done via PPCInstr_AvLdSt) uses
2143          * stvx, which stores the vector in proper LE format,
2144          * with byte zero (far right byte of the register in LE format)
2145          * stored at the lowest memory address.  Therefore, to obtain
2146          * integer word zero, we need to use that lowest memory address
2147          * as the base for the load.
2148          */
2149         if (IEndianess == Iend_LE)
2150            am_off_word0 = am_off0;
2151         else
2152            am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
2153
2154         // store vec, load low word to dst
2155         addInstr(env,
2156                  PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2157         addInstr(env,
2158                  PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
2159
2160         add_to_sp( env, 32 );       // Reset SP
2161         return dst;
2162      }
2163
2164      case Iop_V128to64:
2165      case Iop_V128HIto64:
2166         if (mode64) {
2167            HReg     r_aligned16;
2168            HReg     dst = newVRegI(env);
2169            HReg     vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2170            PPCAMode *am_off0, *am_off8, *am_off_arg;
2171            sub_from_sp( env, 32 );     // Move SP down 32 bytes
2172
2173            // get a quadword aligned address within our stack space
2174            r_aligned16 = get_sp_aligned16( env );
2175            am_off0 = PPCAMode_IR( 0, r_aligned16 );
2176            am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
2177
2178            // store vec, load low word or high to dst
2179            addInstr(env,
2180                     PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
2181            if (IEndianess == Iend_LE) {
2182               if (op_unop == Iop_V128HIto64)
2183                  am_off_arg = am_off8;
2184               else
2185                  am_off_arg = am_off0;
2186            } else {
2187               if (op_unop == Iop_V128HIto64)
2188                  am_off_arg = am_off0;
2189               else
2190                  am_off_arg = am_off8;
2191            }
2192            addInstr(env,
2193                     PPCInstr_Load(
2194                        8, dst,
2195                        am_off_arg,
2196                        mode64 ));
2197
2198            add_to_sp( env, 32 );       // Reset SP
2199            return dst;
2200         }
2201         break;
2202      case Iop_16to8:
2203      case Iop_32to8:
2204      case Iop_32to16:
2205      case Iop_64to8:
2206         /* These are no-ops. */
2207         return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2208
2209      /* ReinterpF64asI64(e) */
2210      /* Given an IEEE754 double, produce an I64 with the same bit
2211         pattern. */
2212      case Iop_ReinterpF64asI64:
2213         if (mode64) {
2214            PPCAMode *am_addr;
2215            HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
2216            HReg r_dst  = newVRegI(env);
2217
2218            sub_from_sp( env, 16 );     // Move SP down 16 bytes
2219            am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2220
2221            // store as F64
2222            addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2223                                           fr_src, am_addr ));
2224            // load as Ity_I64
2225            addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2226
2227            add_to_sp( env, 16 );       // Reset SP
2228            return r_dst;
2229         }
2230         break;
2231
2232      /* ReinterpF32asI32(e) */
2233      /* Given an IEEE754 float, produce an I32 with the same bit
2234         pattern. */
2235      case Iop_ReinterpF32asI32: {
2236         /* I believe this generates correct code for both 32- and
2237            64-bit hosts. */
2238         PPCAMode *am_addr;
2239         HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
2240         HReg r_dst  = newVRegI(env);
2241
2242         sub_from_sp( env, 16 );     // Move SP down 16 bytes
2243         am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2244
2245         // store as F32
2246         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
2247                                        fr_src, am_addr ));
2248         // load as Ity_I32
2249         addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
2250
2251         add_to_sp( env, 16 );       // Reset SP
2252         return r_dst;
2253      }
2254      break;
2255
2256      case Iop_ReinterpD64asI64:
2257         if (mode64) {
2258            PPCAMode *am_addr;
2259            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2260            HReg r_dst  = newVRegI(env);
2261
2262            sub_from_sp( env, 16 );     // Move SP down 16 bytes
2263            am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
2264
2265            // store as D64
2266            addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
2267                                           fr_src, am_addr ));
2268            // load as Ity_I64
2269            addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
2270            add_to_sp( env, 16 );       // Reset SP
2271            return r_dst;
2272         }
2273         break;
2274
2275      case Iop_BCDtoDPB: {
2276         /* the following is only valid in 64 bit mode */
2277         if (!mode64) break;
2278
2279         PPCCondCode cc;
2280         UInt        argiregs;
2281         HReg        argregs[1];
2282         HReg        r_dst  = newVRegI(env);
2283         Int         argreg;
2284
2285         argiregs = 0;
2286         argreg = 0;
2287         argregs[0] = hregPPC_GPR3(mode64);
2288
2289         argiregs |= (1 << (argreg+3));
2290         addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2291                                     iselWordExpr_R(env, e->Iex.Unop.arg,
2292                                                    IEndianess) ) );
2293
2294         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2295         if (IEndianess == Iend_LE) {
2296             addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
2297                                          argiregs,
2298                                          mk_RetLoc_simple(RLPri_Int)) );
2299         } else {
2300             HWord*      fdescr;
2301             fdescr = (HWord*)h_calc_BCDtoDPB;
2302             addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2303                                          argiregs,
2304                                          mk_RetLoc_simple(RLPri_Int)) );
2305         }
2306
2307         addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2308         return r_dst;
2309      }
2310
2311      case Iop_DPBtoBCD: {
2312         /* the following is only valid in 64 bit mode */
2313         if (!mode64) break;
2314
2315         PPCCondCode cc;
2316         UInt        argiregs;
2317         HReg        argregs[1];
2318         HReg        r_dst  = newVRegI(env);
2319         Int         argreg;
2320
2321         argiregs = 0;
2322         argreg = 0;
2323         argregs[0] = hregPPC_GPR3(mode64);
2324
2325         argiregs |= (1 << (argreg+3));
2326         addInstr(env, mk_iMOVds_RR( argregs[argreg++],
2327                                     iselWordExpr_R(env, e->Iex.Unop.arg,
2328                                                    IEndianess) ) );
2329
2330         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
2331
2332        if (IEndianess == Iend_LE) {
2333            addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
2334                                         argiregs,
2335                                         mk_RetLoc_simple(RLPri_Int) ) );
2336        } else {
2337            HWord*      fdescr;
2338            fdescr = (HWord*)h_calc_DPBtoBCD;
2339            addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
2340                                         argiregs,
2341                                         mk_RetLoc_simple(RLPri_Int) ) );
2342         }
2343
2344         addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
2345         return r_dst;
2346      }
2347      case Iop_F32toF16x4: {
2348         HReg vdst = newVRegV(env);    /* V128 */
2349         HReg dst  = newVRegI(env);    /* I64*/
2350         HReg r0 = newVRegI(env);    /* I16*/
2351         HReg r1 = newVRegI(env);    /* I16*/
2352         HReg r2 = newVRegI(env);    /* I16*/
2353         HReg r3 = newVRegI(env);    /* I16*/
2354         HReg vsrc  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
2355         PPCAMode *am_off0, *am_off2, *am_off4, *am_off6, *am_off8;
2356         PPCAMode *am_off10, *am_off12, *am_off14;
2357         HReg r_aligned16;
2358
2359         sub_from_sp( env, 32 );     // Move SP down
2360
2361         /* issue instruction */
2362         addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, vdst, vsrc));
2363
2364         /* Get a  quadword aligned address within our stack space */
2365         r_aligned16 = get_sp_aligned16( env );
2366         am_off0  = PPCAMode_IR( 0, r_aligned16 );
2367         am_off2  = PPCAMode_IR( 2, r_aligned16 );
2368         am_off4  = PPCAMode_IR( 4, r_aligned16 );
2369         am_off6  = PPCAMode_IR( 6, r_aligned16 );
2370         am_off8  = PPCAMode_IR( 8, r_aligned16 );
2371         am_off10 = PPCAMode_IR( 10, r_aligned16 );
2372         am_off12 = PPCAMode_IR( 12, r_aligned16 );
2373         am_off14 = PPCAMode_IR( 14, r_aligned16 );
2374
2375         /* Store v128 result to stack. */
2376         addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, vdst, am_off0));
2377
2378         /* fetch four I16 from V128, store into contiguous I64 via stack,  */
2379         if (IEndianess == Iend_LE) {
2380            addInstr(env, PPCInstr_Load( 2, r3, am_off12, mode64));
2381            addInstr(env, PPCInstr_Load( 2, r2, am_off8, mode64));
2382            addInstr(env, PPCInstr_Load( 2, r1, am_off4, mode64));
2383            addInstr(env, PPCInstr_Load( 2, r0, am_off0, mode64));
2384         } else {
2385            addInstr(env, PPCInstr_Load( 2, r0, am_off14, mode64));
2386            addInstr(env, PPCInstr_Load( 2, r1, am_off10, mode64));
2387            addInstr(env, PPCInstr_Load( 2, r2, am_off6, mode64));
2388            addInstr(env, PPCInstr_Load( 2, r3, am_off2, mode64));
2389         }
2390
2391         /* store in contiguous 64-bit values */
2392         addInstr(env, PPCInstr_Store( 2, am_off6, r3, mode64));
2393         addInstr(env, PPCInstr_Store( 2, am_off4, r2, mode64));
2394         addInstr(env, PPCInstr_Store( 2, am_off2, r1, mode64));
2395         addInstr(env, PPCInstr_Store( 2, am_off0, r0, mode64));
2396
2397         /* Fetch I64 */
2398         addInstr(env, PPCInstr_Load(8, dst, am_off0, mode64));
2399
2400         add_to_sp( env, 32 );          // Reset SP
2401         return dst;
2402      }
2403
2404      default:
2405         break;
2406      }
2407
2408     switch (e->Iex.Unop.op) {
2409        case Iop_ExtractExpD64: {
2410
2411            HReg fr_dst = newVRegI(env);
2412            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
2413            HReg tmp    = newVRegF(env);
2414            PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2415            addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
2416
2417            // put the D64 result into a integer register
2418            sub_from_sp( env, 16 );
2419            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2420            addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2421            add_to_sp( env, 16 );
2422            return fr_dst;
2423         }
2424         case Iop_ExtractExpD128: {
2425            HReg fr_dst = newVRegI(env);
2426            HReg r_srcHi;
2427            HReg r_srcLo;
2428            HReg tmp    = newVRegF(env);
2429            PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
2430
2431            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
2432                           IEndianess);
2433            addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
2434                                                  r_srcHi, r_srcLo));
2435
2436            sub_from_sp( env, 16 );
2437            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
2438            addInstr(env, PPCInstr_Load(8, fr_dst, zero_r1, env->mode64));
2439            add_to_sp( env, 16 );
2440            return fr_dst;
2441         }
2442         default:
2443            break;
2444      }
2445
2446      break;
2447   }
2448
2449   /* --------- GET --------- */
2450   case Iex_Get: {
2451      if (ty == Ity_I8  || ty == Ity_I16 ||
2452          ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
2453         HReg r_dst = newVRegI(env);
2454         PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
2455                                          GuestStatePtr(mode64) );
2456         addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
2457                                      r_dst, am_addr, mode64 ));
2458         return r_dst;
2459      }
2460      break;
2461   }
2462
2463   case Iex_GetI: {
2464      PPCAMode* src_am
2465         = genGuestArrayOffset( env, e->Iex.GetI.descr,
2466                                e->Iex.GetI.ix, e->Iex.GetI.bias,
2467                                IEndianess );
2468      HReg r_dst = newVRegI(env);
2469      if (mode64 && ty == Ity_I64) {
2470         addInstr(env, PPCInstr_Load( toUChar(8),
2471                                      r_dst, src_am, mode64 ));
2472         return r_dst;
2473      }
2474      if ((!mode64) && ty == Ity_I32) {
2475         addInstr(env, PPCInstr_Load( toUChar(4),
2476                                      r_dst, src_am, mode64 ));
2477         return r_dst;
2478      }
2479      break;
2480   }
2481
2482   /* --------- CCALL --------- */
2483   case Iex_CCall: {
2484      vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
2485
2486      /* be very restrictive for now.  Only 32/64-bit ints allowed for
2487         args, and 32 bits or host machine word for return type. */
2488      if (!(ty == Ity_I32 || (mode64 && ty == Ity_I64)))
2489         goto irreducible;
2490
2491      /* Marshal args, do the call, clear stack. */
2492      UInt   addToSp = 0;
2493      RetLoc rloc    = mk_RetLoc_INVALID();
2494      doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2495                    e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
2496                    IEndianess );
2497      vassert(is_sane_RetLoc(rloc));
2498      vassert(rloc.pri == RLPri_Int);
2499      vassert(addToSp == 0);
2500
2501      /* GPR3 now holds the destination address from Pin_Goto */
2502      HReg r_dst = newVRegI(env);
2503      addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
2504      return r_dst;
2505   }
2506
2507   /* --------- LITERAL --------- */
2508   /* 32/16/8-bit literals */
2509   case Iex_Const: {
2510      Long l;
2511      HReg r_dst = newVRegI(env);
2512      IRConst* con = e->Iex.Const.con;
2513      switch (con->tag) {
2514         case Ico_U64: if (!mode64) goto irreducible;
2515                       l = (Long)            con->Ico.U64; break;
2516         case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2517         case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2518         case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2519         default:      vpanic("iselIntExpr_R.const(ppc)");
2520      }
2521      addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
2522      return r_dst;
2523   }
2524
2525   /* --------- MULTIPLEX --------- */
2526   case Iex_ITE: { // VFD
2527      if ((ty == Ity_I8  || ty == Ity_I16 ||
2528           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
2529          typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
2530         PPCRI* r1    = iselWordExpr_RI(env, e->Iex.ITE.iftrue, IEndianess);
2531         HReg   r0    = iselWordExpr_R(env, e->Iex.ITE.iffalse, IEndianess);
2532         HReg   r_dst = newVRegI(env);
2533         addInstr(env, mk_iMOVds_RR(r_dst,r0));
2534         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
2535         addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
2536         return r_dst;
2537      }
2538      break;
2539   }
2540
2541   default:
2542      break;
2543   } /* switch (e->tag) */
2544
2545
2546   /* We get here if no pattern matched. */
2547 irreducible:
2548   ppIRExpr(e);
2549   vpanic("iselIntExpr_R(ppc): cannot reduce tree");
2550}
2551
2552
2553/*---------------------------------------------------------*/
2554/*--- ISEL: Integer expression auxiliaries              ---*/
2555/*---------------------------------------------------------*/
2556
2557/* --------------------- AMODEs --------------------- */
2558
2559/* Return an AMode which computes the value of the specified
2560   expression, possibly also adding insns to the code list as a
2561   result.  The expression may only be a word-size one.
2562*/
2563
2564static Bool uInt_fits_in_16_bits ( UInt u )
2565{
2566   /* Is u the same as the sign-extend of its lower 16 bits? */
2567   UInt v = u & 0xFFFF;
2568
2569   v = (Int)(v << 16) >> 16;   /* sign extend */
2570
2571   return u == v;
2572}
2573
2574static Bool uLong_fits_in_16_bits ( ULong u )
2575{
2576   /* Is u the same as the sign-extend of its lower 16 bits? */
2577   ULong v = u & 0xFFFFULL;
2578
2579   v = (Long)(v << 48) >> 48;   /* sign extend */
2580
2581   return u == v;
2582}
2583
2584static Bool uLong_is_4_aligned ( ULong u )
2585{
2586   return toBool((u & 3ULL) == 0);
2587}
2588
2589static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
2590{
2591   Bool mode64 = env->mode64;
2592   switch (am->tag) {
2593   case Pam_IR:
2594      /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
2595         somehow, but I think it's OK. */
2596      return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
2597                     hregIsVirtual(am->Pam.IR.base) &&
2598                     uInt_fits_in_16_bits(am->Pam.IR.index) );
2599   case Pam_RR:
2600      return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
2601                     hregIsVirtual(am->Pam.RR.base) &&
2602                     hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
2603                     hregIsVirtual(am->Pam.RR.index) );
2604   default:
2605      vpanic("sane_AMode: unknown ppc amode tag");
2606   }
2607}
2608
2609static
2610PPCAMode* iselWordExpr_AMode ( ISelEnv* env, const IRExpr* e, IRType xferTy,
2611                               IREndness IEndianess )
2612{
2613   PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy, IEndianess);
2614   vassert(sane_AMode(env, am));
2615   return am;
2616}
2617
2618/* DO NOT CALL THIS DIRECTLY ! */
2619static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e,
2620                                          IRType xferTy, IREndness IEndianess )
2621{
2622   IRType ty = typeOfIRExpr(env->type_env,e);
2623
2624   if (env->mode64) {
2625
2626      /* If the data load/store type is I32 or I64, this amode might
2627         be destined for use in ld/ldu/lwa/st/stu.  In which case
2628         insist that if it comes out as an _IR, the immediate must
2629         have its bottom two bits be zero.  This does assume that for
2630         any other type (I8/I16/I128/F32/F64/V128) the amode will not
2631         be parked in any such instruction.  But that seems a
2632         reasonable assumption.  */
2633      Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
2634
2635      vassert(ty == Ity_I64);
2636
2637      /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
2638      if (e->tag == Iex_Binop
2639          && e->Iex.Binop.op == Iop_Add64
2640          && e->Iex.Binop.arg2->tag == Iex_Const
2641          && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2642          && (aligned4imm  ? uLong_is_4_aligned(e->Iex.Binop.arg2
2643                                                 ->Iex.Const.con->Ico.U64)
2644                           : True)
2645          && uLong_fits_in_16_bits(e->Iex.Binop.arg2
2646                                    ->Iex.Const.con->Ico.U64)) {
2647         return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
2648                             iselWordExpr_R(env, e->Iex.Binop.arg1,
2649                                            IEndianess) );
2650      }
2651
2652      /* Add64(expr,expr) */
2653      if (e->tag == Iex_Binop
2654          && e->Iex.Binop.op == Iop_Add64) {
2655         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2656         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2657         return PPCAMode_RR( r_idx, r_base );
2658      }
2659
2660   } else {
2661
2662      vassert(ty == Ity_I32);
2663
2664      /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
2665      if (e->tag == Iex_Binop
2666          && e->Iex.Binop.op == Iop_Add32
2667          && e->Iex.Binop.arg2->tag == Iex_Const
2668          && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
2669          && uInt_fits_in_16_bits(e->Iex.Binop.arg2
2670                                   ->Iex.Const.con->Ico.U32)) {
2671         return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
2672                             iselWordExpr_R(env, e->Iex.Binop.arg1,
2673                                            IEndianess) );
2674      }
2675
2676      /* Add32(expr,expr) */
2677      if (e->tag == Iex_Binop
2678          && e->Iex.Binop.op == Iop_Add32) {
2679         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2680         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
2681         return PPCAMode_RR( r_idx, r_base );
2682      }
2683
2684   }
2685
2686   /* Doesn't match anything in particular.  Generate it into
2687      a register and use that. */
2688   return PPCAMode_IR( 0, iselWordExpr_R(env,e,IEndianess) );
2689}
2690
2691
2692/* --------------------- RH --------------------- */
2693
2694/* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
2695   (reg-or-halfword-immediate).  It's important to specify whether the
2696   immediate is to be regarded as signed or not.  If yes, this will
2697   never return -32768 as an immediate; this guaranteed that all
2698   signed immediates that are return can have their sign inverted if
2699   need be. */
2700
2701static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, const IRExpr* e,
2702                                IREndness IEndianess )
2703{
2704  PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e, IEndianess);
2705   /* sanity checks ... */
2706   switch (ri->tag) {
2707   case Prh_Imm:
2708      vassert(ri->Prh.Imm.syned == syned);
2709      if (syned)
2710         vassert(ri->Prh.Imm.imm16 != 0x8000);
2711      return ri;
2712   case Prh_Reg:
2713      vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2714      vassert(hregIsVirtual(ri->Prh.Reg.reg));
2715      return ri;
2716   default:
2717      vpanic("iselIntExpr_RH: unknown ppc RH tag");
2718   }
2719}
2720
2721/* DO NOT CALL THIS DIRECTLY ! */
2722static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, const IRExpr* e,
2723                                    IREndness IEndianess )
2724{
2725   ULong u;
2726   Long  l;
2727   IRType ty = typeOfIRExpr(env->type_env,e);
2728   vassert(ty == Ity_I8  || ty == Ity_I16 ||
2729           ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2730
2731   /* special case: immediate */
2732   if (e->tag == Iex_Const) {
2733      IRConst* con = e->Iex.Const.con;
2734      /* What value are we aiming to generate? */
2735      switch (con->tag) {
2736      /* Note: Not sign-extending - we carry 'syned' around */
2737      case Ico_U64: vassert(env->mode64);
2738                    u =              con->Ico.U64; break;
2739      case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
2740      case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
2741      case Ico_U8:  u = 0x000000FF & con->Ico.U8; break;
2742      default:      vpanic("iselIntExpr_RH.Iex_Const(ppch)");
2743      }
2744      l = (Long)u;
2745      /* Now figure out if it's representable. */
2746      if (!syned && u <= 65535) {
2747         return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
2748      }
2749      if (syned && l >= -32767 && l <= 32767) {
2750         return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
2751      }
2752      /* no luck; use the Slow Way. */
2753   }
2754
2755   /* default case: calculate into a register and return that */
2756   return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2757}
2758
2759
2760/* --------------------- RIs --------------------- */
2761
2762/* Calculate an expression into an PPCRI operand.  As with
2763   iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
2764   in 64-bit mode, 64 bits. */
2765
2766static PPCRI* iselWordExpr_RI ( ISelEnv* env, const IRExpr* e,
2767                                IREndness IEndianess )
2768{
2769   PPCRI* ri = iselWordExpr_RI_wrk(env, e, IEndianess);
2770   /* sanity checks ... */
2771   switch (ri->tag) {
2772   case Pri_Imm:
2773      return ri;
2774   case Pri_Reg:
2775      vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
2776      vassert(hregIsVirtual(ri->Pri.Reg));
2777      return ri;
2778   default:
2779      vpanic("iselIntExpr_RI: unknown ppc RI tag");
2780   }
2781}
2782
2783/* DO NOT CALL THIS DIRECTLY ! */
2784static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, const IRExpr* e,
2785                                    IREndness IEndianess )
2786{
2787   Long  l;
2788   IRType ty = typeOfIRExpr(env->type_env,e);
2789   vassert(ty == Ity_I8  || ty == Ity_I16 ||
2790           ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
2791
2792   /* special case: immediate */
2793   if (e->tag == Iex_Const) {
2794      IRConst* con = e->Iex.Const.con;
2795      switch (con->tag) {
2796      case Ico_U64: vassert(env->mode64);
2797                    l = (Long)            con->Ico.U64; break;
2798      case Ico_U32: l = (Long)(Int)       con->Ico.U32; break;
2799      case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
2800      case Ico_U8:  l = (Long)(Int)(Char )con->Ico.U8;  break;
2801      default:      vpanic("iselIntExpr_RI.Iex_Const(ppch)");
2802      }
2803      return PPCRI_Imm((ULong)l);
2804   }
2805
2806   /* default case: calculate into a register and return that */
2807   return PPCRI_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2808}
2809
2810
2811/* --------------------- RH5u --------------------- */
2812
2813/* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
2814   being an immediate in the range 1 .. 31 inclusive.  Used for doing
2815   shift amounts.  Only used in 32-bit mode. */
2816
2817static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, const IRExpr* e,
2818                                  IREndness IEndianess )
2819{
2820   PPCRH* ri;
2821   vassert(!env->mode64);
2822   ri = iselWordExpr_RH5u_wrk(env, e, IEndianess);
2823   /* sanity checks ... */
2824   switch (ri->tag) {
2825   case Prh_Imm:
2826      vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
2827      vassert(!ri->Prh.Imm.syned);
2828      return ri;
2829   case Prh_Reg:
2830      vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2831      vassert(hregIsVirtual(ri->Prh.Reg.reg));
2832      return ri;
2833   default:
2834      vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
2835   }
2836}
2837
2838/* DO NOT CALL THIS DIRECTLY ! */
2839static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, const IRExpr* e,
2840                                      IREndness IEndianess )
2841{
2842   IRType ty = typeOfIRExpr(env->type_env,e);
2843   vassert(ty == Ity_I8);
2844
2845   /* special case: immediate */
2846   if (e->tag == Iex_Const
2847       && e->Iex.Const.con->tag == Ico_U8
2848       && e->Iex.Const.con->Ico.U8 >= 1
2849       && e->Iex.Const.con->Ico.U8 <= 31) {
2850      return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
2851   }
2852
2853   /* default case: calculate into a register and return that */
2854   return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2855}
2856
2857
2858/* --------------------- RH6u --------------------- */
2859
2860/* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
2861   being an immediate in the range 1 .. 63 inclusive.  Used for doing
2862   shift amounts.  Only used in 64-bit mode. */
2863
2864static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, const IRExpr* e,
2865                                  IREndness IEndianess )
2866{
2867   PPCRH* ri;
2868   vassert(env->mode64);
2869   ri = iselWordExpr_RH6u_wrk(env, e, IEndianess);
2870   /* sanity checks ... */
2871   switch (ri->tag) {
2872   case Prh_Imm:
2873      vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
2874      vassert(!ri->Prh.Imm.syned);
2875      return ri;
2876   case Prh_Reg:
2877      vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
2878      vassert(hregIsVirtual(ri->Prh.Reg.reg));
2879      return ri;
2880   default:
2881      vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
2882   }
2883}
2884
2885/* DO NOT CALL THIS DIRECTLY ! */
2886static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, const IRExpr* e,
2887                                      IREndness IEndianess )
2888{
2889   IRType ty = typeOfIRExpr(env->type_env,e);
2890   vassert(ty == Ity_I8);
2891
2892   /* special case: immediate */
2893   if (e->tag == Iex_Const
2894       && e->Iex.Const.con->tag == Ico_U8
2895       && e->Iex.Const.con->Ico.U8 >= 1
2896       && e->Iex.Const.con->Ico.U8 <= 63) {
2897      return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
2898   }
2899
2900   /* default case: calculate into a register and return that */
2901   return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
2902}
2903
2904
2905/* --------------------- CONDCODE --------------------- */
2906
2907/* Generate code to evaluated a bit-typed expression, returning the
2908   condition code which would correspond when the expression would
2909   notionally have returned 1. */
2910
2911static PPCCondCode iselCondCode ( ISelEnv* env, const IRExpr* e,
2912                                  IREndness IEndianess )
2913{
2914   /* Uh, there's nothing we can sanity check here, unfortunately. */
2915   return iselCondCode_wrk(env,e, IEndianess);
2916}
2917
2918/* DO NOT CALL THIS DIRECTLY ! */
2919static PPCCondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e,
2920                                      IREndness IEndianess )
2921{
2922   vassert(e);
2923   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2924
2925   /* Constant 1:Bit */
2926   if (e->tag == Iex_Const && e->Iex.Const.con->Ico.U1 == True) {
2927      // Make a compare that will always be true:
2928      HReg r_zero = newVRegI(env);
2929      addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
2930      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2931                                 7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
2932      return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
2933   }
2934
2935   /* Not1(...) */
2936   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2937      /* Generate code for the arg, and negate the test condition */
2938      PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
2939      cond.test = invertCondTest(cond.test);
2940      return cond;
2941   }
2942
2943   /* --- patterns rooted at: 32to1 or 64to1 --- */
2944
2945   /* 32to1, 64to1 */
2946   if (e->tag == Iex_Unop &&
2947       (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
2948      HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2949      HReg tmp = newVRegI(env);
2950      /* could do better, probably -- andi. */
2951      addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
2952                                 src, PPCRH_Imm(False,1)));
2953      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2954                                 7/*cr*/, tmp, PPCRH_Imm(False,1)));
2955      return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
2956   }
2957
2958   /* --- patterns rooted at: CmpNEZ8 --- */
2959
2960   /* CmpNEZ8(x) */
2961   /* Note this cloned as CmpNE8(x,0) below. */
2962   /* could do better -- andi. */
2963   if (e->tag == Iex_Unop
2964       && e->Iex.Unop.op == Iop_CmpNEZ8) {
2965      HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2966      HReg tmp = newVRegI(env);
2967      addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
2968                                 PPCRH_Imm(False,0xFF)));
2969      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2970                                 7/*cr*/, tmp, PPCRH_Imm(False,0)));
2971      return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2972   }
2973
2974   /* --- patterns rooted at: CmpNEZ32 --- */
2975
2976   /* CmpNEZ32(x) */
2977   if (e->tag == Iex_Unop
2978       && e->Iex.Unop.op == Iop_CmpNEZ32) {
2979      HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
2980      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
2981                                 7/*cr*/, r1, PPCRH_Imm(False,0)));
2982      return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
2983   }
2984
2985   /* --- patterns rooted at: Cmp*32* --- */
2986
2987   /* Cmp*32*(x,y) */
2988   if (e->tag == Iex_Binop
2989       && (e->Iex.Binop.op == Iop_CmpEQ32
2990           || e->Iex.Binop.op == Iop_CmpNE32
2991           || e->Iex.Binop.op == Iop_CmpLT32S
2992           || e->Iex.Binop.op == Iop_CmpLT32U
2993           || e->Iex.Binop.op == Iop_CmpLE32S
2994           || e->Iex.Binop.op == Iop_CmpLE32U)) {
2995      Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
2996                    e->Iex.Binop.op == Iop_CmpLE32S);
2997      HReg   r1  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
2998      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
2999      addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
3000                                 7/*cr*/, r1, ri2));
3001
3002      switch (e->Iex.Binop.op) {
3003      case Iop_CmpEQ32:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
3004      case Iop_CmpNE32:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3005      case Iop_CmpLT32U: case Iop_CmpLT32S:
3006         return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
3007      case Iop_CmpLE32U: case Iop_CmpLE32S:
3008         return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3009      default: vpanic("iselCondCode(ppc): CmpXX32");
3010      }
3011   }
3012
3013   /* --- patterns rooted at: CmpNEZ64 --- */
3014
3015   /* CmpNEZ64 */
3016   if (e->tag == Iex_Unop
3017       && e->Iex.Unop.op == Iop_CmpNEZ64) {
3018      if (!env->mode64) {
3019         HReg hi, lo;
3020         HReg tmp = newVRegI(env);
3021         iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg, IEndianess );
3022         addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
3023         addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
3024                                    7/*cr*/, tmp,PPCRH_Imm(False,0)));
3025         return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3026      } else {  // mode64
3027         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3028         addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
3029                                    7/*cr*/, r_src,PPCRH_Imm(False,0)));
3030         return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3031      }
3032   }
3033
3034   /* --- patterns rooted at: Cmp*64* --- */
3035
3036   /* Cmp*64*(x,y) */
3037   if (e->tag == Iex_Binop
3038       && (e->Iex.Binop.op == Iop_CmpEQ64
3039           || e->Iex.Binop.op == Iop_CmpNE64
3040           || e->Iex.Binop.op == Iop_CmpLT64S
3041           || e->Iex.Binop.op == Iop_CmpLT64U
3042           || e->Iex.Binop.op == Iop_CmpLE64S
3043           || e->Iex.Binop.op == Iop_CmpLE64U)) {
3044      Bool   syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
3045                      e->Iex.Binop.op == Iop_CmpLE64S);
3046      HReg    r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3047      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
3048      vassert(env->mode64);
3049      addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
3050                                 7/*cr*/, r1, ri2));
3051
3052      switch (e->Iex.Binop.op) {
3053      case Iop_CmpEQ64:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
3054      case Iop_CmpNE64:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3055      case Iop_CmpLT64U: return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
3056      case Iop_CmpLE64U: return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
3057      default: vpanic("iselCondCode(ppc): CmpXX64");
3058      }
3059   }
3060
3061   /* --- patterns rooted at: CmpNE8 --- */
3062
3063   /* CmpNE8(x,0) */
3064   /* Note this is a direct copy of CmpNEZ8 above. */
3065   /* could do better -- andi. */
3066   if (e->tag == Iex_Binop
3067       && e->Iex.Binop.op == Iop_CmpNE8
3068       && isZeroU8(e->Iex.Binop.arg2)) {
3069      HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3070      HReg tmp = newVRegI(env);
3071      addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
3072                                 PPCRH_Imm(False,0xFF)));
3073      addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3074                                 7/*cr*/, tmp, PPCRH_Imm(False,0)));
3075      return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
3076   }
3077
3078   /* var */
3079   if (e->tag == Iex_RdTmp) {
3080      HReg r_src      = lookupIRTemp(env, e->Iex.RdTmp.tmp);
3081      HReg src_masked = newVRegI(env);
3082      addInstr(env,
3083               PPCInstr_Alu(Palu_AND, src_masked,
3084                            r_src, PPCRH_Imm(False,1)));
3085      addInstr(env,
3086               PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
3087                            7/*cr*/, src_masked, PPCRH_Imm(False,1)));
3088      return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
3089   }
3090
3091   vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
3092   ppIRExpr(e);
3093   vpanic("iselCondCode(ppc)");
3094}
3095
3096
3097/*---------------------------------------------------------*/
3098/*--- ISEL: Integer expressions (128 bit)               ---*/
3099/*---------------------------------------------------------*/
3100
3101/* 64-bit mode ONLY: compute a 128-bit value into a register pair,
3102   which is returned as the first two parameters.  As with
3103   iselWordExpr_R, these may be either real or virtual regs; in any
3104   case they must not be changed by subsequent code emitted by the
3105   caller.  */
3106
3107static void iselInt128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
3108                             const IRExpr* e, IREndness IEndianess )
3109{
3110   vassert(env->mode64);
3111   iselInt128Expr_wrk(rHi, rLo, env, e, IEndianess);
3112#  if 0
3113   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3114#  endif
3115   vassert(hregClass(*rHi) == HRcGPR(env->mode64));
3116   vassert(hregIsVirtual(*rHi));
3117   vassert(hregClass(*rLo) == HRcGPR(env->mode64));
3118   vassert(hregIsVirtual(*rLo));
3119}
3120
3121/* DO NOT CALL THIS DIRECTLY ! */
3122static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
3123                                 const IRExpr* e, IREndness IEndianess )
3124{
3125   Bool mode64 = env->mode64;
3126
3127   vassert(e);
3128   vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3129
3130   /* read 128-bit IRTemp */
3131   if (e->tag == Iex_RdTmp) {
3132      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3133      return;
3134   }
3135
3136   /* 128-bit GET */
3137   if (e->tag == Iex_Get) {
3138      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3139                                       GuestStatePtr(mode64) );
3140      PPCAMode* am_addr4 = advance4(env, am_addr);
3141      HReg tLo = newVRegI(env);
3142      HReg tHi = newVRegI(env);
3143
3144      addInstr(env, PPCInstr_Load( 8, tHi, am_addr,  mode64));
3145      addInstr(env, PPCInstr_Load( 8, tLo, am_addr4, mode64));
3146      *rHi = tHi;
3147      *rLo = tLo;
3148      return;
3149   }
3150
3151   /* --------- BINARY ops --------- */
3152   if (e->tag == Iex_Binop) {
3153      switch (e->Iex.Binop.op) {
3154      /* 64 x 64 -> 128 multiply */
3155      case Iop_MullU64:
3156      case Iop_MullS64: {
3157         HReg     tLo     = newVRegI(env);
3158         HReg     tHi     = newVRegI(env);
3159         Bool     syned   = toBool(e->Iex.Binop.op == Iop_MullS64);
3160         HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3161         HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3162         addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3163                                     False/*lo64*/, False/*64bit mul*/,
3164                                     tLo, r_srcL, r_srcR));
3165         addInstr(env, PPCInstr_MulL(syned,
3166                                     True/*hi64*/, False/*64bit mul*/,
3167                                     tHi, r_srcL, r_srcR));
3168         *rHi = tHi;
3169         *rLo = tLo;
3170         return;
3171      }
3172
3173      /* 64HLto128(e1,e2) */
3174      case Iop_64HLto128:
3175         *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3176         *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3177         return;
3178      default:
3179         break;
3180      }
3181   } /* if (e->tag == Iex_Binop) */
3182
3183
3184   /* --------- UNARY ops --------- */
3185   if (e->tag == Iex_Unop) {
3186      switch (e->Iex.Unop.op) {
3187      default:
3188         break;
3189      }
3190   } /* if (e->tag == Iex_Unop) */
3191
3192   vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
3193   ppIRExpr(e);
3194   vpanic("iselInt128Expr(ppc64)");
3195}
3196
3197
3198/*---------------------------------------------------------*/
3199/*--- ISEL: Integer expressions (64 bit)                ---*/
3200/*---------------------------------------------------------*/
3201
3202/* 32-bit mode ONLY: compute a 128-bit value into a register quad */
3203static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
3204                                     HReg* rLo, ISelEnv* env, const IRExpr* e,
3205                                     IREndness IEndianess )
3206{
3207   vassert(!env->mode64);
3208   iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e, IEndianess);
3209#  if 0
3210   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3211#  endif
3212   vassert(hregClass(*rHi) == HRcInt32);
3213   vassert(hregIsVirtual(*rHi));
3214   vassert(hregClass(*rMedHi) == HRcInt32);
3215   vassert(hregIsVirtual(*rMedHi));
3216   vassert(hregClass(*rMedLo) == HRcInt32);
3217   vassert(hregIsVirtual(*rMedLo));
3218   vassert(hregClass(*rLo) == HRcInt32);
3219   vassert(hregIsVirtual(*rLo));
3220}
3221
3222static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
3223                                         HReg* rMedLo, HReg* rLo,
3224                                         ISelEnv* env, const IRExpr* e,
3225                                         IREndness IEndianess )
3226{
3227   vassert(e);
3228   vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
3229
3230   /* read 128-bit IRTemp */
3231   if (e->tag == Iex_RdTmp) {
3232      lookupIRTempQuad( rHi, rMedHi, rMedLo, rLo, env, e->Iex.RdTmp.tmp);
3233      return;
3234   }
3235
3236   if (e->tag == Iex_Binop) {
3237
3238      IROp op_binop = e->Iex.Binop.op;
3239      switch (op_binop) {
3240      case Iop_64HLto128:
3241         iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1, IEndianess);
3242         iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2, IEndianess);
3243         return;
3244      default:
3245         vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
3246                    op_binop);
3247         break;
3248      }
3249   }
3250
3251   vex_printf("iselInt128Expr_to_32x4_wrk: e->tag 0x%x not found\n", e->tag);
3252   return;
3253}
3254
3255/* 32-bit mode ONLY: compute a 64-bit value into a register pair,
3256   which is returned as the first two parameters.  As with
3257   iselIntExpr_R, these may be either real or virtual regs; in any
3258   case they must not be changed by subsequent code emitted by the
3259   caller.  */
3260
3261static void iselInt64Expr ( HReg* rHi, HReg* rLo,
3262                            ISelEnv* env, const IRExpr* e,
3263                            IREndness IEndianess )
3264{
3265   vassert(!env->mode64);
3266   iselInt64Expr_wrk(rHi, rLo, env, e, IEndianess);
3267#  if 0
3268   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3269#  endif
3270   vassert(hregClass(*rHi) == HRcInt32);
3271   vassert(hregIsVirtual(*rHi));
3272   vassert(hregClass(*rLo) == HRcInt32);
3273   vassert(hregIsVirtual(*rLo));
3274}
3275
3276/* DO NOT CALL THIS DIRECTLY ! */
3277static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
3278                                ISelEnv* env, const IRExpr* e,
3279                                IREndness IEndianess )
3280{
3281   vassert(e);
3282   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
3283
3284   /* 64-bit load */
3285   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
3286      HReg tLo    = newVRegI(env);
3287      HReg tHi    = newVRegI(env);
3288      HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr, IEndianess);
3289      vassert(!env->mode64);
3290      addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3291                                   tHi, PPCAMode_IR( 0, r_addr ),
3292                                   False/*32-bit insn please*/) );
3293      addInstr(env, PPCInstr_Load( 4/*byte-load*/,
3294                                   tLo, PPCAMode_IR( 4, r_addr ),
3295                                   False/*32-bit insn please*/) );
3296      *rHi = tHi;
3297      *rLo = tLo;
3298      return;
3299   }
3300
3301   /* 64-bit literal */
3302   if (e->tag == Iex_Const) {
3303      ULong w64 = e->Iex.Const.con->Ico.U64;
3304      UInt  wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
3305      UInt  wLo = ((UInt)w64) & 0xFFFFFFFF;
3306      HReg  tLo = newVRegI(env);
3307      HReg  tHi = newVRegI(env);
3308      vassert(e->Iex.Const.con->tag == Ico_U64);
3309      addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
3310      addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
3311      *rHi = tHi;
3312      *rLo = tLo;
3313      return;
3314   }
3315
3316   /* read 64-bit IRTemp */
3317   if (e->tag == Iex_RdTmp) {
3318      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3319      return;
3320   }
3321
3322   /* 64-bit GET */
3323   if (e->tag == Iex_Get) {
3324      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3325                                       GuestStatePtr(False/*mode32*/) );
3326      PPCAMode* am_addr4 = advance4(env, am_addr);
3327      HReg tLo = newVRegI(env);
3328      HReg tHi = newVRegI(env);
3329      addInstr(env, PPCInstr_Load( 4, tHi, am_addr,  False/*mode32*/ ));
3330      addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
3331      *rHi = tHi;
3332      *rLo = tLo;
3333      return;
3334   }
3335
3336   /* --------- CCALL --------- */
3337   if(e->tag == Iex_CCall) {
3338      IRType ty = typeOfIRExpr(env->type_env,e);
3339      Bool mode64 = env->mode64;
3340
3341      vassert(ty == e->Iex.CCall.retty); /* well-formedness of IR */
3342
3343      /* be very restrictive for now.  Only 32-bit ints allowed for
3344         args, and 32 bits or host machine word for return type. */
3345      vassert(!(ty == Ity_I32 || (mode64 && ty == Ity_I64)));
3346
3347      /* Marshal args, do the call, clear stack. */
3348      UInt   addToSp = 0;
3349      RetLoc rloc    = mk_RetLoc_INVALID();
3350      doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
3351                    e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
3352                    IEndianess );
3353      vassert(is_sane_RetLoc(rloc));
3354
3355      vassert(rloc.pri == RLPri_2Int);
3356      vassert(addToSp == 0);
3357
3358      /* GPR3 now holds the destination address from Pin_Goto */
3359      HReg r_dst = newVRegI(env);
3360      addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
3361      *rHi = r_dst;
3362      *rLo = r_dst;
3363      return;
3364   }
3365
3366   /* 64-bit ITE */
3367   if (e->tag == Iex_ITE) { // VFD
3368      HReg e0Lo, e0Hi, eXLo, eXHi;
3369      iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue, IEndianess);
3370      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse, IEndianess);
3371      HReg tLo = newVRegI(env);
3372      HReg tHi = newVRegI(env);
3373      addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
3374      addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
3375      PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
3376      addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
3377      addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
3378      *rHi = tHi;
3379      *rLo = tLo;
3380      return;
3381   }
3382
3383   /* --------- BINARY ops --------- */
3384   if (e->tag == Iex_Binop) {
3385      IROp op_binop = e->Iex.Binop.op;
3386      switch (op_binop) {
3387         /* 32 x 32 -> 64 multiply */
3388         case Iop_MullU32:
3389         case Iop_MullS32: {
3390            HReg     tLo     = newVRegI(env);
3391            HReg     tHi     = newVRegI(env);
3392            Bool     syned   = toBool(op_binop == Iop_MullS32);
3393            HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1,
3394                                              IEndianess);
3395            HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2,
3396                                              IEndianess);
3397            addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
3398                                        False/*lo32*/, True/*32bit mul*/,
3399                                        tLo, r_srcL, r_srcR));
3400            addInstr(env, PPCInstr_MulL(syned,
3401                                        True/*hi32*/, True/*32bit mul*/,
3402                                        tHi, r_srcL, r_srcR));
3403            *rHi = tHi;
3404            *rLo = tLo;
3405            return;
3406         }
3407
3408         /* Or64/And64/Xor64 */
3409         case Iop_Or64:
3410         case Iop_And64:
3411         case Iop_Xor64: {
3412            HReg xLo, xHi, yLo, yHi;
3413            HReg tLo = newVRegI(env);
3414            HReg tHi = newVRegI(env);
3415            PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
3416                          (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
3417            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3418            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3419            addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
3420            addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
3421            *rHi = tHi;
3422            *rLo = tLo;
3423            return;
3424         }
3425
3426         /* Add64 */
3427         case Iop_Add64: {
3428            HReg xLo, xHi, yLo, yHi;
3429            HReg tLo = newVRegI(env);
3430            HReg tHi = newVRegI(env);
3431            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
3432            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
3433            addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
3434                                            tLo, xLo, yLo));
3435            addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
3436                                            tHi, xHi, yHi));
3437            *rHi = tHi;
3438            *rLo = tLo;
3439            return;
3440         }
3441
3442         /* 32HLto64(e1,e2) */
3443         case Iop_32HLto64:
3444            *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
3445            *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3446            return;
3447
3448         /* F64toI64[S|U] */
3449         case Iop_F64toI64S: case Iop_F64toI64U: {
3450            HReg      tLo     = newVRegI(env);
3451            HReg      tHi     = newVRegI(env);
3452            HReg      r1      = StackFramePtr(env->mode64);
3453            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3454            PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3455            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
3456                                            IEndianess);
3457            HReg      ftmp    = newVRegF(env);
3458
3459            vassert(!env->mode64);
3460            /* Set host rounding mode */
3461            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3462
3463            sub_from_sp( env, 16 );
3464            addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
3465                                          (op_binop == Iop_F64toI64S) ? True : False,
3466                                          True, ftmp, fsrc));
3467            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3468            addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3469            addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3470            add_to_sp( env, 16 );
3471
3472            ///* Restore default FPU rounding. */
3473            //set_FPU_rounding_default( env );
3474            *rHi = tHi;
3475            *rLo = tLo;
3476            return;
3477         }
3478         case Iop_D64toI64S: {
3479            HReg      tLo     = newVRegI(env);
3480            HReg      tHi     = newVRegI(env);
3481            HReg      r1      = StackFramePtr(env->mode64);
3482            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3483            PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
3484            HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
3485            HReg tmp    = newVRegF(env);
3486
3487            vassert(!env->mode64);
3488            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3489            addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
3490
3491            sub_from_sp( env, 16 );
3492            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3493            addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3494            addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3495            add_to_sp( env, 16 );
3496            *rHi = tHi;
3497            *rLo = tLo;
3498            return;
3499         }
3500         case Iop_D128toI64S: {
3501            PPCFpOp fpop = Pfp_DCTFIXQ;
3502            HReg r_srcHi = newVRegF(env);
3503            HReg r_srcLo = newVRegF(env);
3504            HReg tLo     = newVRegI(env);
3505            HReg tHi     = newVRegI(env);
3506            HReg ftmp    = newVRegF(env);
3507            PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3508            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3509
3510            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3511            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
3512                           IEndianess);
3513            addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
3514
3515            // put the D64 result into an integer register pair
3516            sub_from_sp( env, 16 );
3517            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
3518            addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3519            addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3520            add_to_sp( env, 16 );
3521            *rHi = tHi;
3522            *rLo = tLo;
3523            return;
3524         }
3525         default:
3526            break;
3527      }
3528   } /* if (e->tag == Iex_Binop) */
3529
3530
3531   /* --------- UNARY ops --------- */
3532   if (e->tag == Iex_Unop) {
3533      switch (e->Iex.Unop.op) {
3534
3535      /* CmpwNEZ64(e) */
3536      case Iop_CmpwNEZ64: {
3537         HReg argHi, argLo;
3538         HReg tmp1  = newVRegI(env);
3539         HReg tmp2  = newVRegI(env);
3540         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3541         /* tmp1 = argHi | argLo */
3542         addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
3543         /* tmp2 = (tmp1 | -tmp1) >>s 31 */
3544         addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
3545         addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
3546         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3547                                     tmp2, tmp2, PPCRH_Imm(False, 31)));
3548         *rHi = tmp2;
3549         *rLo = tmp2; /* yes, really tmp2 */
3550         return;
3551      }
3552
3553      /* Left64 */
3554      case Iop_Left64: {
3555         HReg argHi, argLo;
3556         HReg zero32 = newVRegI(env);
3557         HReg resHi  = newVRegI(env);
3558         HReg resLo  = newVRegI(env);
3559         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
3560         vassert(env->mode64 == False);
3561         addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
3562         /* resHi:resLo = - argHi:argLo */
3563         addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
3564                                         resLo, zero32, argLo ));
3565         addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
3566                                         resHi, zero32, argHi ));
3567         /* resHi:resLo |= srcHi:srcLo */
3568         addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
3569         addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
3570         *rHi = resHi;
3571         *rLo = resLo;
3572         return;
3573      }
3574
3575      /* 32Sto64(e) */
3576      case Iop_32Sto64: {
3577         HReg tHi = newVRegI(env);
3578         HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3579         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3580                                     tHi, src, PPCRH_Imm(False,31)));
3581         *rHi = tHi;
3582         *rLo = src;
3583         return;
3584      }
3585      case Iop_ExtractExpD64: {
3586         HReg tmp    = newVRegF(env);
3587         HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
3588         HReg      tLo     = newVRegI(env);
3589         HReg      tHi     = newVRegI(env);
3590         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3591         PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3592
3593         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
3594
3595         // put the D64 result into a integer register pair
3596         sub_from_sp( env, 16 );
3597         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3598         addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3599         addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3600         add_to_sp( env, 16 );
3601         *rHi = tHi;
3602         *rLo = tLo;
3603         return;
3604      }
3605      case Iop_ExtractExpD128: {
3606         HReg      r_srcHi;
3607         HReg      r_srcLo;
3608         HReg      tmp     = newVRegF(env);
3609         HReg      tLo     = newVRegI(env);
3610         HReg      tHi     = newVRegI(env);
3611         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3612         PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
3613
3614         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg, IEndianess);
3615         addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
3616                                                  r_srcHi, r_srcLo));
3617
3618         // put the D64 result into a integer register pair
3619         sub_from_sp( env, 16 );
3620         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, tmp, zero_r1));
3621         addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
3622         addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
3623         add_to_sp( env, 16 );
3624         *rHi = tHi;
3625         *rLo = tLo;
3626         return;
3627      }
3628
3629      /* 32Uto64(e) */
3630      case Iop_32Uto64: {
3631         HReg tHi = newVRegI(env);
3632         HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
3633         addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
3634         *rHi = tHi;
3635         *rLo = tLo;
3636         return;
3637      }
3638
3639      case Iop_128to64: {
3640         /* Narrow, return the low 64-bit half as a 32-bit
3641          * register pair */
3642         HReg r_Hi    = INVALID_HREG;
3643         HReg r_MedHi = INVALID_HREG;
3644         HReg r_MedLo = INVALID_HREG;
3645         HReg r_Lo    = INVALID_HREG;
3646
3647         iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3648                                env, e->Iex.Unop.arg, IEndianess);
3649         *rHi = r_MedLo;
3650         *rLo = r_Lo;
3651         return;
3652      }
3653
3654      case Iop_128HIto64: {
3655         /* Narrow, return the high 64-bit half as a 32-bit
3656          *  register pair */
3657         HReg r_Hi    = INVALID_HREG;
3658         HReg r_MedHi = INVALID_HREG;
3659         HReg r_MedLo = INVALID_HREG;
3660         HReg r_Lo    = INVALID_HREG;
3661
3662         iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
3663                                env, e->Iex.Unop.arg, IEndianess);
3664         *rHi = r_Hi;
3665         *rLo = r_MedHi;
3666         return;
3667      }
3668
3669      /* V128{HI}to64 */
3670      case Iop_V128HIto64:
3671      case Iop_V128to64: {
3672         HReg r_aligned16;
3673         Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
3674         HReg tLo = newVRegI(env);
3675         HReg tHi = newVRegI(env);
3676         HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
3677         PPCAMode *am_off0, *am_offLO, *am_offHI;
3678         sub_from_sp( env, 32 );     // Move SP down 32 bytes
3679
3680         // get a quadword aligned address within our stack space
3681         r_aligned16 = get_sp_aligned16( env );
3682         am_off0  = PPCAMode_IR( 0,     r_aligned16 );
3683         am_offHI = PPCAMode_IR( off,   r_aligned16 );
3684         am_offLO = PPCAMode_IR( off+4, r_aligned16 );
3685
3686         // store as Vec128
3687         addInstr(env,
3688                  PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
3689
3690         // load hi,lo words (of hi/lo half of vec) as Ity_I32's
3691         addInstr(env,
3692                  PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
3693         addInstr(env,
3694                  PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
3695
3696         add_to_sp( env, 32 );       // Reset SP
3697         *rHi = tHi;
3698         *rLo = tLo;
3699         return;
3700      }
3701
3702      /* could do better than this, but for now ... */
3703      case Iop_1Sto64: {
3704         HReg tLo = newVRegI(env);
3705         HReg tHi = newVRegI(env);
3706         PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
3707         addInstr(env, PPCInstr_Set(cond,tLo));
3708         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
3709                                     tLo, tLo, PPCRH_Imm(False,31)));
3710         addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
3711                                     tLo, tLo, PPCRH_Imm(False,31)));
3712         addInstr(env, mk_iMOVds_RR(tHi, tLo));
3713         *rHi = tHi;
3714         *rLo = tLo;
3715         return;
3716      }
3717
3718      case Iop_Not64: {
3719         HReg xLo, xHi;
3720         HReg tmpLo = newVRegI(env);
3721         HReg tmpHi = newVRegI(env);
3722         iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg, IEndianess);
3723         addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
3724         addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
3725         *rHi = tmpHi;
3726         *rLo = tmpLo;
3727         return;
3728      }
3729
3730      /* ReinterpF64asI64(e) */
3731      /* Given an IEEE754 double, produce an I64 with the same bit
3732         pattern. */
3733      case Iop_ReinterpF64asI64: {
3734         PPCAMode *am_addr0, *am_addr1;
3735         HReg fr_src  = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
3736         HReg r_dstLo = newVRegI(env);
3737         HReg r_dstHi = newVRegI(env);
3738
3739         sub_from_sp( env, 16 );     // Move SP down 16 bytes
3740         am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
3741         am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
3742
3743         // store as F64
3744         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
3745                                        fr_src, am_addr0 ));
3746
3747         // load hi,lo as Ity_I32's
3748         addInstr(env, PPCInstr_Load( 4, r_dstHi,
3749                                      am_addr0, False/*mode32*/ ));
3750         addInstr(env, PPCInstr_Load( 4, r_dstLo,
3751                                      am_addr1, False/*mode32*/ ));
3752         *rHi = r_dstHi;
3753         *rLo = r_dstLo;
3754
3755         add_to_sp( env, 16 );       // Reset SP
3756         return;
3757      }
3758
3759      case Iop_ReinterpD64asI64: {
3760         HReg fr_src  = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
3761         PPCAMode *am_addr0, *am_addr1;
3762         HReg r_dstLo = newVRegI(env);
3763         HReg r_dstHi = newVRegI(env);
3764
3765
3766         sub_from_sp( env, 16 );     // Move SP down 16 bytes
3767         am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
3768         am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
3769
3770         // store as D64
3771         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
3772                                        fr_src, am_addr0 ));
3773
3774         // load hi,lo as Ity_I32's
3775         addInstr(env, PPCInstr_Load( 4, r_dstHi,
3776                                      am_addr0, False/*mode32*/ ));
3777         addInstr(env, PPCInstr_Load( 4, r_dstLo,
3778                                      am_addr1, False/*mode32*/ ));
3779         *rHi = r_dstHi;
3780         *rLo = r_dstLo;
3781
3782         add_to_sp( env, 16 );       // Reset SP
3783
3784         return;
3785      }
3786
3787      case Iop_BCDtoDPB: {
3788         PPCCondCode cc;
3789         UInt        argiregs;
3790         HReg        argregs[2];
3791         Int         argreg;
3792         HReg        tLo = newVRegI(env);
3793         HReg        tHi = newVRegI(env);
3794         HReg        tmpHi;
3795         HReg        tmpLo;
3796         Bool        mode64 = env->mode64;
3797
3798         argregs[0] = hregPPC_GPR3(mode64);
3799         argregs[1] = hregPPC_GPR4(mode64);
3800
3801         argiregs = 0;
3802         argreg = 0;
3803
3804         iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess );
3805
3806         argiregs |= ( 1 << (argreg+3 ) );
3807         addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
3808
3809         argiregs |= ( 1 << (argreg+3 ) );
3810         addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
3811
3812         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
3813
3814         if (IEndianess == Iend_LE) {
3815             addInstr( env, PPCInstr_Call( cc, (Addr)h_calc_BCDtoDPB,
3816                                           argiregs,
3817                                           mk_RetLoc_simple(RLPri_2Int) ) );
3818         } else {
3819             Addr64 target;
3820             target = mode64 ? (Addr)h_calc_BCDtoDPB :
3821               toUInt( (Addr)h_calc_BCDtoDPB );
3822             addInstr( env, PPCInstr_Call( cc, target,
3823                                           argiregs,
3824                                           mk_RetLoc_simple(RLPri_2Int) ) );
3825         }
3826
3827         addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
3828         addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
3829
3830         *rHi = tHi;
3831         *rLo = tLo;
3832         return;
3833      }
3834
3835      case Iop_DPBtoBCD: {
3836         PPCCondCode cc;
3837         UInt        argiregs;
3838         HReg        argregs[2];
3839         Int         argreg;
3840         HReg        tLo = newVRegI(env);
3841         HReg        tHi = newVRegI(env);
3842         HReg        tmpHi;
3843         HReg        tmpLo;
3844         Bool        mode64 = env->mode64;
3845
3846         argregs[0] = hregPPC_GPR3(mode64);
3847         argregs[1] = hregPPC_GPR4(mode64);
3848
3849         argiregs = 0;
3850         argreg = 0;
3851
3852         iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess);
3853
3854         argiregs |= (1 << (argreg+3));
3855         addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
3856
3857         argiregs |= (1 << (argreg+3));
3858         addInstr(env, mk_iMOVds_RR( argregs[argreg], tmpLo));
3859
3860         cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
3861
3862         if (IEndianess == Iend_LE) {
3863             addInstr(env, PPCInstr_Call( cc, (Addr)h_calc_DPBtoBCD,
3864                                          argiregs,
3865                                          mk_RetLoc_simple(RLPri_2Int) ) );
3866         } else {
3867             Addr64 target;
3868             target = mode64 ? (Addr)h_calc_DPBtoBCD :
3869               toUInt( (Addr)h_calc_DPBtoBCD );
3870             addInstr(env, PPCInstr_Call( cc, target, argiregs,
3871                                          mk_RetLoc_simple(RLPri_2Int) ) );
3872         }
3873
3874         addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
3875         addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
3876
3877         *rHi = tHi;
3878         *rLo = tLo;
3879         return;
3880      }
3881
3882      default:
3883         break;
3884      }
3885   } /* if (e->tag == Iex_Unop) */
3886
3887   vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
3888   ppIRExpr(e);
3889   vpanic("iselInt64Expr(ppc)");
3890}
3891
3892
3893/*---------------------------------------------------------*/
3894/*--- ISEL: Floating point expressions (32 bit)         ---*/
3895/*---------------------------------------------------------*/
3896
3897/* Nothing interesting here; really just wrappers for
3898   64-bit stuff. */
3899
3900static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
3901{
3902  HReg r = iselFltExpr_wrk( env, e, IEndianess );
3903#  if 0
3904   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3905#  endif
3906   vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
3907   vassert(hregIsVirtual(r));
3908   return r;
3909}
3910
3911/* DO NOT CALL THIS DIRECTLY */
3912static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e,
3913                              IREndness IEndianess )
3914{
3915   Bool        mode64 = env->mode64;
3916
3917   IRType ty = typeOfIRExpr(env->type_env,e);
3918   vassert(ty == Ity_F32);
3919
3920   if (e->tag == Iex_RdTmp) {
3921      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3922   }
3923
3924   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
3925      PPCAMode* am_addr;
3926      HReg r_dst = newVRegF(env);
3927      vassert(e->Iex.Load.ty == Ity_F32);
3928      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/,
3929                                   IEndianess);
3930      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
3931      return r_dst;
3932   }
3933
3934   if (e->tag == Iex_Get) {
3935      HReg r_dst = newVRegF(env);
3936      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
3937                                       GuestStatePtr(env->mode64) );
3938      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
3939      return r_dst;
3940   }
3941
3942   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
3943      /* This is quite subtle.  The only way to do the relevant
3944         truncation is to do a single-precision store and then a
3945         double precision load to get it back into a register.  The
3946         problem is, if the data is then written to memory a second
3947         time, as in
3948
3949            STbe(...) = TruncF64asF32(...)
3950
3951         then will the second truncation further alter the value?  The
3952         answer is no: flds (as generated here) followed by fsts
3953         (generated for the STbe) is the identity function on 32-bit
3954         floats, so we are safe.
3955
3956         Another upshot of this is that if iselStmt can see the
3957         entirety of
3958
3959            STbe(...) = TruncF64asF32(arg)
3960
3961         then it can short circuit having to deal with TruncF64asF32
3962         individually; instead just compute arg into a 64-bit FP
3963         register and do 'fsts' (since that itself does the
3964         truncation).
3965
3966         We generate pretty poor code here (should be ok both for
3967         32-bit and 64-bit mode); but it is expected that for the most
3968         part the latter optimisation will apply and hence this code
3969         will not often be used.
3970      */
3971      HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
3972      HReg      fdst    = newVRegF(env);
3973      PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
3974
3975      sub_from_sp( env, 16 );
3976      // store as F32, hence truncating
3977      addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
3978                                     fsrc, zero_r1 ));
3979      // and reload.  Good huh?! (sigh)
3980      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
3981                                     fdst, zero_r1 ));
3982      add_to_sp( env, 16 );
3983      return fdst;
3984   }
3985
3986   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
3987      if (mode64) {
3988         HReg fdst = newVRegF(env);
3989         HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
3990         HReg r1   = StackFramePtr(env->mode64);
3991         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
3992
3993         /* Set host rounding mode */
3994         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
3995
3996         sub_from_sp( env, 16 );
3997
3998         addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
3999         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4000         addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4001                                       False, False,
4002                                       fdst, fdst));
4003
4004         add_to_sp( env, 16 );
4005
4006         ///* Restore default FPU rounding. */
4007         //set_FPU_rounding_default( env );
4008         return fdst;
4009      } else {
4010         /* 32-bit mode */
4011         HReg fdst = newVRegF(env);
4012         HReg isrcHi, isrcLo;
4013         HReg r1   = StackFramePtr(env->mode64);
4014         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4015         PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4016
4017         iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2, IEndianess);
4018
4019         /* Set host rounding mode */
4020         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4021
4022         sub_from_sp( env, 16 );
4023
4024         addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4025         addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4026         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4027         addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4028                                       False, False,
4029                                       fdst, fdst));
4030
4031         add_to_sp( env, 16 );
4032
4033         ///* Restore default FPU rounding. */
4034         //set_FPU_rounding_default( env );
4035         return fdst;
4036      }
4037
4038   }
4039
4040   vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
4041   ppIRExpr(e);
4042   vpanic("iselFltExpr_wrk(ppc)");
4043}
4044
4045
4046/*---------------------------------------------------------*/
4047/*--- ISEL: Floating point expressions (64 bit)         ---*/
4048/*---------------------------------------------------------*/
4049
4050/* Compute a 64-bit floating point value into a register, the identity
4051   of which is returned.  As with iselIntExpr_R, the reg may be either
4052   real or virtual; in any case it must not be changed by subsequent
4053   code emitted by the caller.  */
4054
4055/* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
4056
4057    Type                  S (1 bit)   E (11 bits)   F (52 bits)
4058    ----                  ---------   -----------   -----------
4059    signalling NaN        u           2047 (max)    .0uuuuu---u
4060                                                    (with at least
4061                                                     one 1 bit)
4062    quiet NaN             u           2047 (max)    .1uuuuu---u
4063
4064    negative infinity     1           2047 (max)    .000000---0
4065
4066    positive infinity     0           2047 (max)    .000000---0
4067
4068    negative zero         1           0             .000000---0
4069
4070    positive zero         0           0             .000000---0
4071*/
4072
4073static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4074{
4075   HReg r = iselDblExpr_wrk( env, e, IEndianess );
4076#  if 0
4077   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4078#  endif
4079   vassert(hregClass(r) == HRcFlt64);
4080   vassert(hregIsVirtual(r));
4081   return r;
4082}
4083
4084/* DO NOT CALL THIS DIRECTLY */
4085static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e,
4086                              IREndness IEndianess )
4087{
4088   Bool mode64 = env->mode64;
4089   IRType ty = typeOfIRExpr(env->type_env,e);
4090   vassert(e);
4091   vassert(ty == Ity_F64);
4092
4093   if (e->tag == Iex_RdTmp) {
4094      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4095   }
4096
4097   /* --------- LITERAL --------- */
4098   if (e->tag == Iex_Const) {
4099      union { UInt u32x2[2]; ULong u64; Double f64; } u;
4100      vassert(sizeof(u) == 8);
4101      vassert(sizeof(u.u64) == 8);
4102      vassert(sizeof(u.f64) == 8);
4103      vassert(sizeof(u.u32x2) == 8);
4104
4105      if (e->Iex.Const.con->tag == Ico_F64) {
4106         u.f64 = e->Iex.Const.con->Ico.F64;
4107      }
4108      else if (e->Iex.Const.con->tag == Ico_F64i) {
4109         u.u64 = e->Iex.Const.con->Ico.F64i;
4110      }
4111      else
4112         vpanic("iselDblExpr(ppc): const");
4113
4114      if (!mode64) {
4115         HReg r_srcHi = newVRegI(env);
4116         HReg r_srcLo = newVRegI(env);
4117         addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
4118         addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
4119         return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4120      } else { // mode64
4121         HReg r_src = newVRegI(env);
4122         addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
4123         return mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
4124      }
4125   }
4126
4127   /* --------- LOAD --------- */
4128   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4129      HReg r_dst = newVRegF(env);
4130      PPCAMode* am_addr;
4131      vassert(e->Iex.Load.ty == Ity_F64);
4132      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/,
4133                                   IEndianess);
4134      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4135      return r_dst;
4136   }
4137
4138   /* --------- GET --------- */
4139   if (e->tag == Iex_Get) {
4140      HReg r_dst = newVRegF(env);
4141      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4142                                       GuestStatePtr(mode64) );
4143      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
4144      return r_dst;
4145   }
4146
4147   /* --------- OPS --------- */
4148   if (e->tag == Iex_Qop) {
4149      PPCFpOp fpop = Pfp_INVALID;
4150      switch (e->Iex.Qop.details->op) {
4151         case Iop_MAddF64:    fpop = Pfp_MADDD; break;
4152         case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
4153         case Iop_MSubF64:    fpop = Pfp_MSUBD; break;
4154         case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
4155         default: break;
4156      }
4157      if (fpop != Pfp_INVALID) {
4158         HReg r_dst  = newVRegF(env);
4159         HReg r_srcML  = iselDblExpr(env, e->Iex.Qop.details->arg2,
4160                                     IEndianess);
4161         HReg r_srcMR  = iselDblExpr(env, e->Iex.Qop.details->arg3,
4162                                     IEndianess);
4163         HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4,
4164                                     IEndianess);
4165         set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1, IEndianess );
4166         addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
4167                                               r_srcML, r_srcMR, r_srcAcc));
4168         return r_dst;
4169      }
4170   }
4171
4172   if (e->tag == Iex_Triop) {
4173      IRTriop *triop = e->Iex.Triop.details;
4174      PPCFpOp fpop = Pfp_INVALID;
4175      switch (triop->op) {
4176         case Iop_AddF64:    fpop = Pfp_ADDD; break;
4177         case Iop_SubF64:    fpop = Pfp_SUBD; break;
4178         case Iop_MulF64:    fpop = Pfp_MULD; break;
4179         case Iop_DivF64:    fpop = Pfp_DIVD; break;
4180         case Iop_AddF64r32: fpop = Pfp_ADDS; break;
4181         case Iop_SubF64r32: fpop = Pfp_SUBS; break;
4182         case Iop_MulF64r32: fpop = Pfp_MULS; break;
4183         case Iop_DivF64r32: fpop = Pfp_DIVS; break;
4184         default: break;
4185      }
4186      if (fpop != Pfp_INVALID) {
4187         HReg r_dst  = newVRegF(env);
4188         HReg r_srcL = iselDblExpr(env, triop->arg2, IEndianess);
4189         HReg r_srcR = iselDblExpr(env, triop->arg3, IEndianess);
4190         set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4191         addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
4192         return r_dst;
4193      }
4194   }
4195
4196   if (e->tag == Iex_Binop) {
4197      PPCFpOp fpop = Pfp_INVALID;
4198      switch (e->Iex.Binop.op) {
4199      case Iop_SqrtF64:   fpop = Pfp_SQRT;   break;
4200      default: break;
4201      }
4202      if (fpop == Pfp_SQRT) {
4203         HReg fr_dst = newVRegF(env);
4204         HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4205         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4206         addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4207         return fr_dst;
4208      }
4209   }
4210
4211   if (e->tag == Iex_Binop) {
4212
4213      if (e->Iex.Binop.op == Iop_F128toF64) {
4214         HReg fr_dst = newVRegF(env);
4215         HReg fr_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4216         HReg tmp = newVRegV(env);
4217         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4218         PPCAMode* eight_r1 = PPCAMode_IR( 8, StackFramePtr(env->mode64) );
4219         PPCFpOp fpop = Pfp_INVALID;
4220
4221         if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4222            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4223            fpop = Pfp_FPQTODRNDODD;
4224         } else {
4225            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4226            fpop = Pfp_FPQTOD;
4227         }
4228
4229         addInstr(env, PPCInstr_Fp128Unary(fpop, tmp, fr_src));
4230
4231         /* result is in a 128-bit vector register, move to 64-bit reg to
4232          * match the Iop specification.  The result will get moved back
4233          * to a 128-bit register and stored once the value is returned.
4234          */
4235         sub_from_sp( env, 16 );
4236         addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, zero_r1));
4237         if (IEndianess == Iend_LE)
4238            addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, eight_r1));
4239         else
4240            /* High 64-bits stored at lower address */
4241            addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, zero_r1));
4242
4243         add_to_sp( env, 16 );
4244
4245         return fr_dst;
4246      }
4247
4248      if (e->Iex.Binop.op == Iop_RoundF64toF32) {
4249         HReg r_dst = newVRegF(env);
4250         HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4251         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4252         addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
4253         //set_FPU_rounding_default( env );
4254         return r_dst;
4255      }
4256
4257      if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
4258         if (mode64) {
4259            HReg fdst = newVRegF(env);
4260            HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4261            HReg r1   = StackFramePtr(env->mode64);
4262            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4263
4264            /* Set host rounding mode */
4265            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4266
4267            sub_from_sp( env, 16 );
4268
4269            addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
4270            addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4271            addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4272                                          e->Iex.Binop.op == Iop_I64StoF64,
4273                                          True/*fdst is 64 bit*/,
4274                                          fdst, fdst));
4275
4276            add_to_sp( env, 16 );
4277
4278            ///* Restore default FPU rounding. */
4279            //set_FPU_rounding_default( env );
4280            return fdst;
4281         } else {
4282            /* 32-bit mode */
4283            HReg fdst = newVRegF(env);
4284            HReg isrcHi, isrcLo;
4285            HReg r1   = StackFramePtr(env->mode64);
4286            PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
4287            PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
4288
4289            iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2,
4290                          IEndianess);
4291
4292            /* Set host rounding mode */
4293            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4294
4295            sub_from_sp( env, 16 );
4296
4297            addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
4298            addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
4299            addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
4300            addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
4301                                          e->Iex.Binop.op == Iop_I64StoF64,
4302                                          True/*fdst is 64 bit*/,
4303                                          fdst, fdst));
4304
4305            add_to_sp( env, 16 );
4306
4307            ///* Restore default FPU rounding. */
4308            //set_FPU_rounding_default( env );
4309            return fdst;
4310         }
4311      }
4312
4313   }
4314
4315   if (e->tag == Iex_Unop) {
4316      PPCFpOp fpop = Pfp_INVALID;
4317      switch (e->Iex.Unop.op) {
4318         case Iop_NegF64:     fpop = Pfp_NEG; break;
4319         case Iop_AbsF64:     fpop = Pfp_ABS; break;
4320         case Iop_RSqrtEst5GoodF64:      fpop = Pfp_RSQRTE; break;
4321         case Iop_RoundF64toF64_NegINF:  fpop = Pfp_FRIM; break;
4322         case Iop_RoundF64toF64_PosINF:  fpop = Pfp_FRIP; break;
4323         case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
4324         case Iop_RoundF64toF64_ZERO:    fpop = Pfp_FRIZ; break;
4325         default: break;
4326      }
4327      if (fpop != Pfp_INVALID) {
4328         HReg fr_dst = newVRegF(env);
4329         HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4330         addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
4331         return fr_dst;
4332      }
4333   }
4334
4335   if (e->tag == Iex_Unop) {
4336      switch (e->Iex.Unop.op) {
4337      case Iop_F128HItoF64:
4338      case Iop_F128LOtoF64:
4339         {
4340            /* put upper/lower 64-bits of F128 into an F64. */
4341            HReg     r_aligned16;
4342            HReg     fdst = newVRegF(env);
4343            HReg     fsrc = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4344            PPCAMode *am_off0, *am_off8, *am_off_arg;
4345            sub_from_sp( env, 32 );     // Move SP down 32 bytes
4346
4347            // get a quadword aligned address within our stack space
4348            r_aligned16 = get_sp_aligned16( env );
4349            am_off0 = PPCAMode_IR( 0, r_aligned16 );
4350            am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
4351
4352            /* store 128-bit floating point value to memory, load low word
4353             * or high to 64-bit destination floating point register
4354             */
4355            addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, fsrc, am_off0));
4356            if (IEndianess == Iend_LE) {
4357               if (e->Iex.Binop.op == Iop_F128HItoF64)
4358                  am_off_arg = am_off8;
4359               else
4360                  am_off_arg = am_off0;
4361            } else {
4362               if (e->Iex.Binop.op == Iop_F128HItoF64)
4363                  am_off_arg = am_off0;
4364               else
4365                  am_off_arg = am_off8;
4366            }
4367            addInstr(env,
4368                    PPCInstr_FpLdSt( True /*load*/,
4369                                      8, fdst,
4370                                      am_off_arg ));
4371            add_to_sp( env, 32 );       // Reset SP
4372            return fdst;
4373         }
4374         case Iop_ReinterpI64asF64: {
4375            /* Given an I64, produce an IEEE754 double with the same
4376               bit pattern. */
4377            if (!mode64) {
4378               HReg r_srcHi, r_srcLo;
4379               iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
4380                               IEndianess);
4381               return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4382            } else {
4383               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4384               return mk_LoadR64toFPR( env, r_src );
4385            }
4386         }
4387
4388         case Iop_F32toF64: {
4389            if (e->Iex.Unop.arg->tag == Iex_Unop &&
4390                     e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
4391               e = e->Iex.Unop.arg;
4392
4393               HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4394               HReg fr_dst = newVRegF(env);
4395               PPCAMode *am_addr;
4396
4397               sub_from_sp( env, 16 );        // Move SP down 16 bytes
4398               am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4399
4400               // store src as Ity_I32's
4401               addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
4402
4403               // load single precision float, but the end results loads into a
4404               // 64-bit FP register -- i.e., F64.
4405               addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
4406
4407               add_to_sp( env, 16 );          // Reset SP
4408               return fr_dst;
4409            }
4410
4411
4412            /* this is a no-op */
4413            HReg res = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
4414            return res;
4415         }
4416         default:
4417            break;
4418      }
4419   }
4420
4421   /* --------- MULTIPLEX --------- */
4422   if (e->tag == Iex_ITE) { // VFD
4423      if (ty == Ity_F64
4424          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
4425         HReg fr1    = iselDblExpr(env, e->Iex.ITE.iftrue, IEndianess);
4426         HReg fr0    = iselDblExpr(env, e->Iex.ITE.iffalse, IEndianess);
4427         HReg fr_dst = newVRegF(env);
4428         addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
4429         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
4430         addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
4431         return fr_dst;
4432      }
4433   }
4434
4435   vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
4436   ppIRExpr(e);
4437   vpanic("iselDblExpr_wrk(ppc)");
4438}
4439
4440static HReg iselDfp32Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
4441{
4442   HReg r = iselDfp32Expr_wrk( env, e, IEndianess );
4443   vassert(hregClass(r) == HRcFlt64);
4444   vassert( hregIsVirtual(r) );
4445   return r;
4446}
4447
4448/* DO NOT CALL THIS DIRECTLY */
4449static HReg iselDfp32Expr_wrk(ISelEnv* env, const IRExpr* e,
4450                              IREndness IEndianess)
4451{
4452   Bool mode64 = env->mode64;
4453   IRType ty = typeOfIRExpr( env->type_env, e );
4454
4455   vassert( e );
4456   vassert( ty == Ity_D32 );
4457
4458   /* --------- GET --------- */
4459   if (e->tag == Iex_Get) {
4460      HReg r_dst = newVRegF( env );
4461      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4462                                       GuestStatePtr(mode64) );
4463      addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4464      return r_dst;
4465   }
4466
4467   /* --------- LOAD --------- */
4468   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4469      PPCAMode* am_addr;
4470      HReg r_dst = newVRegF(env);
4471      vassert(e->Iex.Load.ty == Ity_D32);
4472      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/,
4473                                   IEndianess);
4474      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
4475      return r_dst;
4476   }
4477
4478   /* --------- OPS --------- */
4479   if (e->tag == Iex_Binop) {
4480      if (e->Iex.Binop.op == Iop_D64toD32) {
4481         HReg fr_dst = newVRegF(env);
4482         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4483         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4484         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
4485         return fr_dst;
4486      }
4487   }
4488
4489   ppIRExpr( e );
4490   vpanic( "iselDfp32Expr_wrk(ppc)" );
4491}
4492
4493static HReg iselFp128Expr( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
4494{
4495   HReg r = iselFp128Expr_wrk( env, e, IEndianess );
4496   vassert(hregClass(r) == HRcVec128);
4497   vassert(hregIsVirtual(r));
4498   return r;
4499}
4500
4501/* DO NOT CALL THIS DIRECTLY */
4502static HReg iselFp128Expr_wrk( ISelEnv* env, const IRExpr* e,
4503                               IREndness IEndianess)
4504{
4505   Bool mode64 = env->mode64;
4506   PPCFpOp fpop = Pfp_INVALID;
4507   IRType  ty = typeOfIRExpr(env->type_env,e);
4508
4509   vassert(e);
4510   vassert( ty == Ity_F128 );
4511
4512   /* read 128-bit IRTemp */
4513   if (e->tag == Iex_RdTmp) {
4514      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4515   }
4516
4517  if (e->tag == Iex_Get) {
4518      /* Guest state vectors are 16byte aligned,
4519         so don't need to worry here */
4520      HReg dst = newVRegV(env);
4521
4522      addInstr(env,
4523               PPCInstr_AvLdSt( True/*load*/, 16, dst,
4524                                PPCAMode_IR( e->Iex.Get.offset,
4525                                             GuestStatePtr(mode64) )));
4526      return dst;
4527   }
4528
4529   if (e->tag == Iex_Unop) {
4530      switch (e->Iex.Unop.op) {
4531      case Iop_TruncF128toI64S:
4532         fpop = Pfp_TRUNCFPQTOISD; goto do_Un_F128;
4533      case Iop_TruncF128toI32S:
4534         fpop = Pfp_TRUNCFPQTOISW; goto do_Un_F128;
4535      case Iop_TruncF128toI64U:
4536         fpop = Pfp_TRUNCFPQTOIUD; goto do_Un_F128;
4537      case Iop_TruncF128toI32U:
4538         fpop = Pfp_TRUNCFPQTOIUW; goto do_Un_F128;
4539
4540      do_Un_F128: {
4541         HReg r_dst = newVRegV(env);
4542         HReg r_src = iselFp128Expr(env, e->Iex.Unop.arg, IEndianess);
4543         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4544         return r_dst;
4545      }
4546
4547      case Iop_F64toF128: {
4548         fpop = Pfp_FPDTOQ;
4549         HReg r_dst = newVRegV(env);
4550         HReg r_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
4551         HReg v128tmp = newVRegV(env);
4552         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4553
4554         /* value is in 64-bit float reg, need to move to 128-bit vector reg */
4555         sub_from_sp( env, 16 );
4556         addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, r_src, zero_r1));
4557         addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, v128tmp, zero_r1));
4558         add_to_sp( env, 16 );
4559
4560         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, v128tmp));
4561         return r_dst;
4562      }
4563
4564      case Iop_I64StoF128:
4565         fpop = Pfp_IDSTOQ; goto do_Un_int_F128;
4566      case Iop_I64UtoF128:
4567         fpop = Pfp_IDUTOQ; goto do_Un_int_F128;
4568
4569      do_Un_int_F128: {
4570         HReg r_dst = newVRegV(env);
4571         HReg tmp = newVRegV(env);
4572         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4573         PPCAMode *am_offhi, *am_offlo;
4574         HReg r_aligned16;
4575
4576         /* source is in a 64-bit integer reg, move to 128-bit float reg
4577          * do this via the stack (easy, convenient, etc).
4578          */
4579         sub_from_sp( env, 32 );        // Move SP down
4580
4581         /* Get a quadword aligned address within our stack space */
4582         r_aligned16 = get_sp_aligned16( env );
4583
4584         am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
4585         am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
4586
4587         /* Inst only uses the upper 64-bit of the source */
4588         addInstr(env, PPCInstr_Load(8, r_src, am_offhi, mode64));
4589
4590         /* Fetch result back from stack. */
4591         addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16, tmp, am_offlo));
4592
4593         add_to_sp( env, 32 );          // Reset SP
4594
4595         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, tmp));
4596         return r_dst;
4597      }
4598
4599      default:
4600         break;
4601      } /* switch (e->Iex.Unop.op) */
4602   } /* if (e->tag == Iex_Unop) */
4603
4604   if (e->tag == Iex_Binop) {
4605      switch (e->Iex.Binop.op) {
4606
4607      case Iop_F64HLtoF128:
4608         {
4609            HReg dst    = newVRegV(env);
4610            HReg r_src_hi = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
4611            HReg r_src_lo = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4612            PPCAMode *am_offhi, *am_offlo;
4613            HReg r_aligned16;
4614
4615            /* do this via the stack (easy, convenient, etc) */
4616            sub_from_sp( env, 16 );        // Move SP down
4617
4618            /* Get a quadword aligned address within our stack space */
4619            r_aligned16 = get_sp_aligned16( env );
4620
4621            am_offlo  = PPCAMode_IR( 0,  r_aligned16 );
4622            am_offhi  = PPCAMode_IR( 8,  r_aligned16 );
4623
4624            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4625                                          r_src_lo, am_offlo));
4626            addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8,
4627                                          r_src_hi, am_offhi));
4628
4629            /* Fetch result back from stack. */
4630            addInstr(env, PPCInstr_AvLdSt(True/*load*/, 16,
4631                                          dst, am_offlo));
4632
4633            add_to_sp( env, 16 );          // Reset SP
4634            return dst;
4635         }
4636      case Iop_F128toI128S:
4637         {
4638            HReg dst    = newVRegV(env);
4639            HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4640            PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4641            /* Note: rm is a set of three bit fields that specify the
4642             * rounding mode and which of the two instructions to issue.
4643             */
4644            addInstr(env, PPCInstr_AvBinaryInt(Pav_F128toI128S, dst,
4645                                               r_src, rm));
4646            return dst;
4647         }
4648      case Iop_RndF128:
4649         {
4650            HReg dst    = newVRegV(env);
4651            HReg r_src  = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4652            PPCRI* rm = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4653            /* Note: rm is a set of three bit fields that specify the
4654             * rounding mode and which of the two instructions to issue.
4655             */
4656            addInstr(env, PPCInstr_AvBinaryInt(Pav_ROUNDFPQ, dst,
4657                                               r_src, rm));
4658            return dst;
4659         }
4660      case Iop_SqrtF128:
4661         if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4662            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4663            fpop = Pfp_FPSQRTQRNDODD;
4664            goto do_Bin_F128;
4665         } else {
4666            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4667            fpop = Pfp_FPSQRTQ;
4668            goto do_Bin_F128;
4669         }
4670      case Iop_F128toF32:
4671         if (FPU_rounding_mode_isOdd(e->Iex.Binop.arg1)) {
4672            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4673            fpop = Pfp_FPQTOWRNDODD;
4674            goto do_Bin_F128;
4675         } else {
4676            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4677            fpop = Pfp_FPQTOW;
4678            goto do_Bin_F128;
4679         }
4680      do_Bin_F128: {
4681         HReg r_dst = newVRegV(env);
4682         HReg r_src = iselFp128Expr(env, e->Iex.Binop.arg2, IEndianess);
4683         addInstr(env, PPCInstr_Fp128Unary(fpop, r_dst, r_src));
4684         return r_dst;
4685      }
4686
4687      default:
4688         break;
4689      } /* switch (e->Iex.Binop.op) */
4690   } /* if (e->tag == Iex_Binop) */
4691
4692   if (e->tag == Iex_Triop) {
4693      IRTriop *triop = e->Iex.Triop.details;
4694
4695      switch (triop->op) {
4696      case Iop_AddF128:
4697         if (FPU_rounding_mode_isOdd(triop->arg1)) {
4698            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4699            fpop = Pfp_FPADDQRNDODD; goto do_Tri_F128;
4700         } else {
4701            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4702            fpop = Pfp_FPADDQ; goto do_Tri_F128;
4703         }
4704      case Iop_SubF128:
4705         if (FPU_rounding_mode_isOdd(triop->arg1)) {
4706            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4707            fpop = Pfp_FPSUBQRNDODD; goto do_Tri_F128;
4708         } else {
4709            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4710            fpop = Pfp_FPSUBQ; goto do_Tri_F128;
4711         }
4712      case Iop_MulF128:
4713         if (FPU_rounding_mode_isOdd(triop->arg1)) {
4714            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4715            fpop = Pfp_FPMULQRNDODD; goto do_Tri_F128;
4716         } else {
4717            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4718            fpop = Pfp_FPMULQ; goto do_Tri_F128;
4719         }
4720      case Iop_DivF128:
4721         if (FPU_rounding_mode_isOdd(triop->arg1)) {
4722            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4723            fpop = Pfp_FPDIVQRNDODD; goto do_Tri_F128;
4724         } else {
4725            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4726            fpop = Pfp_FPDIVQ; goto do_Tri_F128;
4727         }
4728      case Iop_MAddF128:
4729         if (FPU_rounding_mode_isOdd(triop->arg1)) {
4730            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4731            fpop = Pfp_FPMULADDQRNDODD; goto do_Tri_F128;
4732         } else {
4733            set_FPU_rounding_mode( env, triop->arg1, IEndianess );
4734            fpop = Pfp_FPMULADDQ; goto do_Tri_F128;
4735         }
4736
4737   do_Tri_F128: {
4738         HReg r_dst  = newVRegV(env);
4739         HReg r_srcL = iselFp128Expr(env, triop->arg2, IEndianess);
4740         HReg r_srcR = iselFp128Expr(env, triop->arg3, IEndianess);
4741
4742         addInstr(env, PPCInstr_Fp128Binary(fpop, r_dst, r_srcL, r_srcR));
4743         return r_dst;
4744      }
4745
4746      default:
4747         break;
4748      } /* switch (e->Iex.Triop.op) */
4749
4750   } /* if (e->tag == Iex_Trinop) */
4751
4752   if (e->tag == Iex_Qop) {
4753      IRQop *qop = e->Iex.Qop.details;
4754
4755      switch (qop->op) {
4756      case Iop_MAddF128:
4757         if (FPU_rounding_mode_isOdd(qop->arg1)) {
4758            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4759            fpop = Pfp_FPMULADDQRNDODD; goto do_Quad_F128;
4760         } else {
4761            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
4762            fpop = Pfp_FPMULADDQ; goto do_Quad_F128;
4763         }
4764      case Iop_MSubF128:
4765         if (FPU_rounding_mode_isOdd(qop->arg1)) {
4766            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4767            fpop = Pfp_FPMULSUBQRNDODD; goto do_Quad_F128;
4768         } else {
4769            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
4770            fpop = Pfp_FPMULSUBQ; goto do_Quad_F128;
4771         }
4772      case Iop_NegMAddF128:
4773         if (FPU_rounding_mode_isOdd(qop->arg1)) {
4774            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4775            fpop = Pfp_FPNEGMULADDQRNDODD; goto do_Quad_F128;
4776         } else {
4777            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
4778            fpop = Pfp_FPNEGMULADDQ; goto do_Quad_F128;
4779         }
4780      case Iop_NegMSubF128:
4781         if (FPU_rounding_mode_isOdd(qop->arg1)) {
4782            /* use rounding mode specified by RN. Issue inst with R0 = 0 */
4783            fpop = Pfp_FPNEGMULSUBQRNDODD; goto do_Quad_F128;
4784         } else {
4785            set_FPU_rounding_mode( env, qop->arg1, IEndianess );
4786            fpop = Pfp_FPNEGMULSUBQ; goto do_Quad_F128;
4787         }
4788
4789      do_Quad_F128: {
4790         HReg r_dst = iselFp128Expr(env, qop->arg3,
4791                                    IEndianess);
4792         HReg r_srcL = iselFp128Expr(env, qop->arg2,
4793                                     IEndianess);
4794         HReg r_srcR = iselFp128Expr(env, qop->arg4,
4795                                     IEndianess);
4796
4797         addInstr(env, PPCInstr_Fp128Trinary(fpop, r_dst, r_srcL, r_srcR));
4798         return r_dst;
4799         }
4800
4801      default:
4802         break;
4803      }
4804   }   /* if (e->tag == Iex_Qop) */
4805
4806   ppIRExpr( e );
4807   vpanic( "iselFp128Expr(ppc64)" );
4808}
4809
4810static HReg iselDfp64Expr(ISelEnv* env, const IRExpr* e, IREndness IEndianess)
4811{
4812   HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
4813   vassert(hregClass(r) == HRcFlt64);
4814   vassert( hregIsVirtual(r) );
4815   return r;
4816}
4817
4818/* DO NOT CALL THIS DIRECTLY */
4819static HReg iselDfp64Expr_wrk(ISelEnv* env, const IRExpr* e,
4820                              IREndness IEndianess)
4821{
4822   Bool mode64 = env->mode64;
4823   IRType ty = typeOfIRExpr( env->type_env, e );
4824   HReg r_dstHi, r_dstLo;
4825
4826   vassert( e );
4827   vassert( ty == Ity_D64 );
4828
4829   if (e->tag == Iex_RdTmp) {
4830      return lookupIRTemp( env, e->Iex.RdTmp.tmp );
4831   }
4832
4833   /* --------- GET --------- */
4834   if (e->tag == Iex_Get) {
4835      HReg r_dst = newVRegF( env );
4836      PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
4837                                       GuestStatePtr(mode64) );
4838      addInstr( env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ) );
4839      return r_dst;
4840   }
4841
4842   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
4843      PPCAMode* am_addr;
4844      HReg r_dst = newVRegF(env);
4845      vassert(e->Iex.Load.ty == Ity_D64);
4846      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/,
4847                                   IEndianess);
4848      addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
4849      return r_dst;
4850   }
4851
4852   /* --------- OPS --------- */
4853   if (e->tag == Iex_Qop) {
4854      HReg r_dst = newVRegF( env );
4855      return r_dst;
4856   }
4857
4858   if (e->tag == Iex_Unop) {
4859      HReg fr_dst = newVRegF(env);
4860      switch (e->Iex.Unop.op) {
4861      case Iop_ReinterpI64asD64: {
4862         /* Given an I64, produce an IEEE754 DFP with the same
4863               bit pattern. */
4864         if (!mode64) {
4865            HReg r_srcHi, r_srcLo;
4866            iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
4867                           IEndianess);
4868            return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
4869         } else {
4870            HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
4871            return mk_LoadR64toFPR( env, r_src );
4872         }
4873      }
4874      case Iop_D32toD64: {
4875         HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg, IEndianess);
4876         addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
4877         return fr_dst;
4878      }
4879      case Iop_D128HItoD64:
4880         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
4881                         IEndianess );
4882         return r_dstHi;
4883      case Iop_D128LOtoD64:
4884         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
4885                         IEndianess );
4886         return r_dstLo;
4887      case Iop_InsertExpD64: {
4888         HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
4889         HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
4890
4891         addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
4892					    fr_srcR));
4893         return fr_dst;
4894       }
4895      default:
4896         vex_printf( "ERROR: iselDfp64Expr_wrk, UNKNOWN unop case %d\n",
4897                     (Int)e->Iex.Unop.op );
4898      }
4899   }
4900
4901   if (e->tag == Iex_Binop) {
4902      PPCFpOp fpop = Pfp_INVALID;
4903      HReg fr_dst = newVRegF(env);
4904
4905      switch (e->Iex.Binop.op) {
4906      case Iop_D128toD64:     fpop = Pfp_DRDPQ;  break;
4907      case Iop_D64toD32:      fpop = Pfp_DRSP;   break;
4908      case Iop_I64StoD64:     fpop = Pfp_DCFFIX; break;
4909      case Iop_RoundD64toInt: fpop = Pfp_DRINTN; break;
4910      default: break;
4911      }
4912      if (fpop == Pfp_DRDPQ) {
4913         HReg r_srcHi = newVRegF(env);
4914         HReg r_srcLo = newVRegF(env);
4915
4916         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4917         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
4918                        IEndianess);
4919         addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
4920         return fr_dst;
4921
4922      } else if (fpop == Pfp_DRINTN) {
4923         HReg fr_src = newVRegF(env);
4924         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
4925
4926         /* NOTE, this IOP takes a DFP value and rounds to the
4927          * neares floating point integer value, i.e. fractional part
4928          * is zero.  The result is a decimal floating point number.
4929          * the INT in the name is a bit misleading.
4930          */
4931         fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4932         addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
4933         return fr_dst;
4934
4935      } else if (fpop == Pfp_DRSP) {
4936         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4937         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4938         addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
4939         return fr_dst;
4940
4941      } else if (fpop == Pfp_DCFFIX) {
4942         HReg fr_src = newVRegF(env);
4943         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4944
4945         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
4946         sub_from_sp( env, 16 );
4947
4948         // put the I64 value into a floating point register
4949         if (mode64) {
4950           HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
4951
4952           addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
4953         } else {
4954            HReg tmpHi, tmpLo;
4955            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
4956
4957            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2,
4958                          IEndianess);
4959            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
4960            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
4961         }
4962
4963         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8,  fr_src, zero_r1));
4964         addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
4965         add_to_sp( env, 16 );
4966         return fr_dst;
4967      }
4968
4969      switch (e->Iex.Binop.op) {
4970      /* shift instructions D64, I32 -> D64 */
4971      case Iop_ShlD64: fpop = Pfp_DSCLI; break;
4972      case Iop_ShrD64: fpop = Pfp_DSCRI; break;
4973      default: break;
4974      }
4975      if (fpop != Pfp_INVALID) {
4976         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
4977         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
4978
4979         /* shift value must be an immediate value */
4980         vassert(shift->tag == Pri_Imm);
4981
4982         addInstr(env, PPCInstr_DfpShift(fpop, fr_dst, fr_src, shift));
4983         return fr_dst;
4984      }
4985
4986      switch (e->Iex.Binop.op) {
4987      case Iop_InsertExpD64:
4988         fpop = Pfp_DIEX;
4989         break;
4990      default: 	break;
4991      }
4992      if (fpop != Pfp_INVALID) {
4993         HReg fr_srcL = newVRegF(env);
4994         HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
4995         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
4996         sub_from_sp( env, 16 );
4997
4998         if (env->mode64) {
4999            // put the I64 value into a floating point reg
5000            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5001
5002            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5003         } else {
5004            // put the I64 register pair into a floating point reg
5005            HReg tmpHi;
5006            HReg tmpLo;
5007            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5008
5009            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1,
5010                          IEndianess);
5011            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
5012            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
5013         }
5014         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_srcL, zero_r1));
5015         addInstr(env, PPCInstr_Dfp64Binary(fpop, fr_dst, fr_srcL,
5016                                            fr_srcR));
5017         add_to_sp( env, 16 );
5018         return fr_dst;
5019      }
5020   }
5021
5022   if (e->tag == Iex_Triop) {
5023      IRTriop *triop = e->Iex.Triop.details;
5024      PPCFpOp fpop = Pfp_INVALID;
5025
5026      switch (triop->op) {
5027      case Iop_AddD64:
5028         fpop = Pfp_DFPADD;
5029         break;
5030      case Iop_SubD64:
5031         fpop = Pfp_DFPSUB;
5032         break;
5033      case Iop_MulD64:
5034         fpop = Pfp_DFPMUL;
5035         break;
5036      case Iop_DivD64:
5037         fpop = Pfp_DFPDIV;
5038         break;
5039      default:
5040         break;
5041      }
5042      if (fpop != Pfp_INVALID) {
5043         HReg r_dst = newVRegF( env );
5044         HReg r_srcL = iselDfp64Expr( env, triop->arg2, IEndianess );
5045         HReg r_srcR = iselDfp64Expr( env, triop->arg3, IEndianess );
5046
5047         set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5048         addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
5049         return r_dst;
5050      }
5051
5052      switch (triop->op) {
5053      case Iop_QuantizeD64:          fpop = Pfp_DQUA;  break;
5054      case Iop_SignificanceRoundD64: fpop = Pfp_RRDTR; break;
5055      default: break;
5056      }
5057      if (fpop == Pfp_DQUA) {
5058         HReg r_dst = newVRegF(env);
5059         HReg r_srcL = iselDfp64Expr(env, triop->arg2, IEndianess);
5060         HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5061         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
5062         addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
5063                                            rmc));
5064         return r_dst;
5065
5066      } else if (fpop == Pfp_RRDTR) {
5067         HReg r_dst = newVRegF(env);
5068         HReg r_srcL = newVRegF(env);
5069         HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
5070         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
5071         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5072         HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5073
5074         /* Move I8 to float register to issue instruction */
5075         sub_from_sp( env, 16 );
5076         if (mode64)
5077            addInstr(env, PPCInstr_Store(8, zero_r1, i8_val, True/*mode64*/));
5078         else
5079            addInstr(env, PPCInstr_Store(4, zero_r1, i8_val, False/*mode32*/));
5080
5081         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5082         add_to_sp( env, 16 );
5083
5084         // will set TE and RMC when issuing instruction
5085         addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR, rmc));
5086         return r_dst;
5087      }
5088   }
5089
5090   ppIRExpr( e );
5091   vpanic( "iselDfp64Expr_wrk(ppc)" );
5092}
5093
5094static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, const IRExpr* e,
5095                           IREndness IEndianess)
5096{
5097   iselDfp128Expr_wrk( rHi, rLo, env, e, IEndianess );
5098   vassert( hregIsVirtual(*rHi) );
5099   vassert( hregIsVirtual(*rLo) );
5100}
5101
5102/* DO NOT CALL THIS DIRECTLY */
5103static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env,
5104                               const IRExpr* e, IREndness IEndianess)
5105{
5106   vassert( e );
5107   vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
5108
5109   /* read 128-bit IRTemp */
5110   if (e->tag == Iex_RdTmp) {
5111      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp );
5112      return;
5113   }
5114
5115   if (e->tag == Iex_Unop) {
5116      HReg r_dstHi = newVRegF(env);
5117      HReg r_dstLo = newVRegF(env);
5118
5119      if (e->Iex.Unop.op == Iop_I64StoD128) {
5120         HReg fr_src = newVRegF(env);
5121         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5122
5123         // put the I64 value into a floating point reg
5124         if (env->mode64) {
5125            HReg tmp   = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5126            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5127         } else {
5128            HReg tmpHi, tmpLo;
5129            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5130
5131            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5132                          IEndianess);
5133            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5134            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5135         }
5136
5137         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_src, zero_r1));
5138         addInstr(env, PPCInstr_DfpI64StoD128(Pfp_DCFFIXQ, r_dstHi, r_dstLo,
5139                                              fr_src));
5140      }
5141
5142      if (e->Iex.Unop.op == Iop_D64toD128) {
5143         HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
5144
5145         /* Source is 64bit, result is 128 bit.  High 64bit source arg,
5146          * is ignored by the instruction.  Set high arg to r_src just
5147          * to meet the vassert tests.
5148          */
5149         addInstr(env, PPCInstr_Dfp128Unary(Pfp_DCTQPQ, r_dstHi, r_dstLo,
5150                                            r_src, r_src));
5151      }
5152      *rHi = r_dstHi;
5153      *rLo = r_dstLo;
5154      return;
5155   }
5156
5157   /* --------- OPS --------- */
5158   if (e->tag == Iex_Binop) {
5159      HReg r_srcHi;
5160      HReg r_srcLo;
5161
5162      switch (e->Iex.Binop.op) {
5163      case Iop_D64HLtoD128:
5164         r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1, IEndianess );
5165         r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2, IEndianess );
5166         *rHi = r_srcHi;
5167         *rLo = r_srcLo;
5168         return;
5169         break;
5170      case Iop_D128toD64: {
5171         PPCFpOp fpop = Pfp_DRDPQ;
5172         HReg fr_dst  = newVRegF(env);
5173
5174         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
5175         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5176                        IEndianess);
5177         addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
5178
5179         /* Need to meet the interface spec but the result is
5180          * just 64-bits so send the result back in both halfs.
5181          */
5182         *rHi = fr_dst;
5183         *rLo = fr_dst;
5184         return;
5185      }
5186      case Iop_ShlD128:
5187      case Iop_ShrD128: {
5188         HReg fr_dst_hi = newVRegF(env);
5189         HReg fr_dst_lo = newVRegF(env);
5190         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
5191         PPCFpOp fpop = Pfp_DSCLIQ;  /* fix later if necessary */
5192
5193         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1,
5194                        IEndianess);
5195
5196         if (e->Iex.Binop.op == Iop_ShrD128)
5197            fpop = Pfp_DSCRIQ;
5198
5199         addInstr(env, PPCInstr_DfpShift128(fpop, fr_dst_hi, fr_dst_lo,
5200                                            r_srcHi, r_srcLo, shift));
5201
5202         *rHi = fr_dst_hi;
5203         *rLo = fr_dst_lo;
5204         return;
5205      }
5206      case Iop_RoundD128toInt: {
5207         HReg r_dstHi = newVRegF(env);
5208         HReg r_dstLo = newVRegF(env);
5209         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
5210
5211         // will set R and RMC when issuing instruction
5212         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5213                        IEndianess);
5214
5215         addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
5216                                            r_srcHi, r_srcLo, r_rmc));
5217         *rHi = r_dstHi;
5218         *rLo = r_dstLo;
5219         return;
5220      }
5221      case Iop_InsertExpD128: {
5222         HReg r_dstHi = newVRegF(env);
5223         HReg r_dstLo = newVRegF(env);
5224         HReg r_srcL  = newVRegF(env);
5225         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5226         r_srcHi = newVRegF(env);
5227         r_srcLo = newVRegF(env);
5228
5229         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
5230                        IEndianess);
5231
5232         /* Move I64 to float register to issue instruction */
5233         if (env->mode64) {
5234            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5235            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
5236         } else {
5237            HReg tmpHi, tmpLo;
5238            PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5239
5240            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
5241                          IEndianess);
5242            addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
5243            addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
5244         }
5245
5246         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_srcL, zero_r1));
5247         addInstr(env, PPCInstr_InsertExpD128(Pfp_DIEXQ,
5248                                              r_dstHi, r_dstLo,
5249                                              r_srcL, r_srcHi, r_srcLo));
5250         *rHi = r_dstHi;
5251         *rLo = r_dstLo;
5252         return;
5253      }
5254      default:
5255         vex_printf( "ERROR: iselDfp128Expr_wrk, UNKNOWN binop case %d\n",
5256                     (Int)e->Iex.Binop.op );
5257         break;
5258      }
5259   }
5260
5261   if (e->tag == Iex_Triop) {
5262      IRTriop *triop = e->Iex.Triop.details;
5263      PPCFpOp fpop = Pfp_INVALID;
5264      HReg r_dstHi = newVRegF(env);
5265      HReg r_dstLo = newVRegF(env);
5266
5267      switch (triop->op) {
5268      case Iop_AddD128:
5269         fpop = Pfp_DFPADDQ;
5270         break;
5271      case Iop_SubD128:
5272         fpop = Pfp_DFPSUBQ;
5273         break;
5274      case Iop_MulD128:
5275         fpop = Pfp_DFPMULQ;
5276         break;
5277      case Iop_DivD128:
5278         fpop = Pfp_DFPDIVQ;
5279         break;
5280      default:
5281         break;
5282      }
5283
5284      if (fpop != Pfp_INVALID) {
5285         HReg r_srcRHi = newVRegV( env );
5286         HReg r_srcRLo = newVRegV( env );
5287
5288         /* dst will be used to pass in the left operand and get the result. */
5289         iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2, IEndianess );
5290         iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3, IEndianess );
5291         set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
5292         addInstr( env,
5293                   PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
5294                                          r_srcRHi, r_srcRLo ) );
5295         *rHi = r_dstHi;
5296         *rLo = r_dstLo;
5297         return;
5298      }
5299      switch (triop->op) {
5300      case Iop_QuantizeD128:          fpop = Pfp_DQUAQ;  break;
5301      case Iop_SignificanceRoundD128: fpop = Pfp_DRRNDQ; break;
5302      default: break;
5303      }
5304      if (fpop == Pfp_DQUAQ) {
5305         HReg r_srcHi = newVRegF(env);
5306         HReg r_srcLo = newVRegF(env);
5307         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5308
5309         /* dst will be used to pass in the left operand and get the result */
5310         iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2, IEndianess);
5311         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5312
5313         // will set RMC when issuing instruction
5314         addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5315                                               r_srcHi, r_srcLo, rmc));
5316        *rHi = r_dstHi;
5317        *rLo = r_dstLo;
5318         return;
5319
5320      } else if (fpop == Pfp_DRRNDQ) {
5321         HReg r_srcHi = newVRegF(env);
5322         HReg r_srcLo = newVRegF(env);
5323         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
5324         PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
5325         PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
5326         HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
5327         HReg r_zero = newVRegI( env );
5328
5329         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
5330
5331         /* dst will be used to pass in the left operand and get the result */
5332         /* Move I8 to float register to issue instruction.  Note, the
5333          * instruction only looks at the bottom 6 bits so we really don't
5334          * have to clear the upper bits since the iselWordExpr_R sets the
5335          * bottom 8-bits.
5336          */
5337         sub_from_sp( env, 16 );
5338
5339         if (env->mode64)
5340            addInstr(env, PPCInstr_Store(4, four_r1, i8_val, True/*mode64*/));
5341         else
5342            addInstr(env, PPCInstr_Store(4, four_r1, i8_val, False/*mode32*/));
5343
5344         /* Have to write to the upper bits to ensure they have been
5345          * initialized. The instruction ignores all but the lower 6-bits.
5346          */
5347         addInstr( env, PPCInstr_LI( r_zero, 0, env->mode64 ) );
5348         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstHi, zero_r1));
5349         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dstLo, zero_r1));
5350
5351         add_to_sp( env, 16 );
5352
5353         // will set RMC when issuing instruction
5354         addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
5355                                               r_srcHi, r_srcLo, rmc));
5356         *rHi = r_dstHi;
5357         *rLo = r_dstLo;
5358         return;
5359      }
5360 }
5361
5362   ppIRExpr( e );
5363   vpanic( "iselDfp128Expr(ppc64)" );
5364}
5365
5366
5367/*---------------------------------------------------------*/
5368/*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
5369/*---------------------------------------------------------*/
5370
5371static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e, IREndness IEndianess )
5372{
5373   HReg r = iselVecExpr_wrk( env, e, IEndianess );
5374#  if 0
5375   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5376#  endif
5377   vassert(hregClass(r) == HRcVec128);
5378   vassert(hregIsVirtual(r));
5379   return r;
5380}
5381
5382/* DO NOT CALL THIS DIRECTLY */
5383static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
5384                              IREndness IEndianess )
5385{
5386   Bool mode64 = env->mode64;
5387   PPCAvOp op = Pav_INVALID;
5388   PPCAvFpOp fpop = Pavfp_INVALID;
5389   IRType  ty = typeOfIRExpr(env->type_env,e);
5390   vassert(e);
5391   vassert(ty == Ity_V128);
5392
5393   if (e->tag == Iex_RdTmp) {
5394      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5395   }
5396
5397   if (e->tag == Iex_Get) {
5398      /* Guest state vectors are 16byte aligned,
5399         so don't need to worry here */
5400      HReg dst = newVRegV(env);
5401      addInstr(env,
5402               PPCInstr_AvLdSt( True/*load*/, 16, dst,
5403                                PPCAMode_IR( e->Iex.Get.offset,
5404                                             GuestStatePtr(mode64) )));
5405      return dst;
5406   }
5407
5408   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
5409      /* Need to be able to do V128 unaligned loads. The BE unaligned load
5410       * can be accomplised using the following code sequece from the ISA.
5411       * It uses the lvx instruction that does two aligned loads and then
5412       * permute the data to store the required data as if it had been an
5413       * unaligned load.
5414       *
5415       *   lvx  Vhi,0,Rb        # load MSQ, using the unaligned address in Rb
5416       *   lvsl Vp, 0,Rb        # Set permute control vector
5417       *   addi Rb,Rb,15        # Address of LSQ
5418       *   lvx  Vlo,0,Rb        # load LSQ
5419       *   vperm Vt,Vhi,Vlo,Vp  # align the data as requested
5420       */
5421
5422      HReg Vhi   = newVRegV(env);
5423      HReg Vlo   = newVRegV(env);
5424      HReg Vp    = newVRegV(env);
5425      HReg v_dst = newVRegV(env);
5426      HReg rB;
5427      HReg rB_plus_15 = newVRegI(env);
5428
5429      vassert(e->Iex.Load.ty == Ity_V128);
5430      rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess );
5431
5432      // lvx  Vhi, 0, Rb
5433      addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi,
5434                                     PPCAMode_IR(0, rB)) );
5435
5436      if (IEndianess == Iend_LE)
5437         // lvsr Vp, 0, Rb
5438         addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp,
5439                                      PPCAMode_IR(0, rB)) );
5440      else
5441         // lvsl Vp, 0, Rb
5442         addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp,
5443                                      PPCAMode_IR(0, rB)) );
5444
5445      // addi Rb_plus_15, Rb, 15
5446      addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15,
5447                                  rB, PPCRH_Imm(True, toUShort(15))) );
5448
5449      // lvx  Vlo, 0, Rb_plus_15
5450      addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo,
5451                                     PPCAMode_IR(0, rB_plus_15)) );
5452
5453      if (IEndianess == Iend_LE)
5454         // vperm Vt, Vhi, Vlo, Vp
5455         addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp ));
5456      else
5457         // vperm Vt, Vhi, Vlo, Vp
5458         addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp ));
5459
5460      return v_dst;
5461   }
5462
5463   if (e->tag == Iex_Unop) {
5464      switch (e->Iex.Unop.op) {
5465
5466      case Iop_F16toF64x2:
5467         {
5468            HReg dst = newVRegV(env);
5469            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5470            /* Note: PPC only coverts the 16-bt value in the upper word
5471             *       to a 64-bit value stored in the upper word.  The
5472             *       contents of the lower word is undefined.
5473             */
5474            addInstr(env, PPCInstr_AvUnary(Pav_F16toF64x2, dst, arg));
5475            return dst;
5476         }
5477
5478      case Iop_F64toF16x2:
5479         {
5480            HReg dst = newVRegV(env);
5481            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5482            /* Note: PPC only coverts the 64-bt value in the upper 64-bit of V128
5483             * to a 16-bit value stored in the upper 64-bits of the result
5484             * V128.  The contents of the lower 64-bits is undefined.
5485             */
5486            addInstr(env, PPCInstr_AvUnary(Pav_F64toF16x2, dst, arg));
5487            return dst;
5488         }
5489
5490      case Iop_F16toF32x4:
5491         {
5492            HReg src = newVRegV(env);
5493            HReg dst = newVRegV(env);
5494            HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5495            PPCAMode *am_off0, *am_off8;
5496            HReg r_aligned16;
5497
5498            vassert(mode64);
5499            /* need to put I64 src into upper 64-bits of vector register,
5500               use stack */
5501            sub_from_sp( env, 32 );     // Move SP down
5502
5503            /* Get a quadword aligned address within our stack space */
5504            r_aligned16 = get_sp_aligned16( env );
5505            am_off0  = PPCAMode_IR( 0, r_aligned16 );
5506            am_off8  = PPCAMode_IR( 8, r_aligned16 );
5507
5508            /* Store I64 to stack */
5509
5510            if (IEndianess == Iend_LE) {
5511               addInstr(env, PPCInstr_Store( 8, am_off8, arg, mode64 ));
5512            } else {
5513               addInstr(env, PPCInstr_Store( 8, am_off0, arg, mode64 ));
5514            }
5515
5516            /* Fetch new v128 src back from stack. */
5517            addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, src, am_off0));
5518
5519            /* issue instruction */
5520            addInstr(env, PPCInstr_AvUnary(Pav_F16toF32x4, dst, src));
5521            add_to_sp( env, 32 );          // Reset SP
5522
5523            return dst;
5524         }
5525
5526      case Iop_F32toF16x4:
5527         {
5528            HReg dst = newVRegI(env);
5529            HReg tmp = newVRegV(env);
5530            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5531            PPCAMode *am_off0, *am_off8;
5532            HReg r_aligned16;
5533
5534            /* Instruction returns a V128, the Iop_F32toF16x4 needs to return
5535             * I64.  Move the upper 64-bits from the instruction to an I64 via
5536             * the stack and return it.
5537             */
5538            sub_from_sp( env, 32 );     // Move SP down
5539
5540            addInstr(env, PPCInstr_AvUnary(Pav_F32toF16x4, tmp, arg));
5541
5542            /* Get a quadword aligned address within our stack space */
5543            r_aligned16 = get_sp_aligned16( env );
5544            am_off0  = PPCAMode_IR( 0, r_aligned16 );
5545            am_off8  = PPCAMode_IR( 8, r_aligned16 );
5546
5547            /* Store v128 tmp to stack. */
5548            addInstr(env, PPCInstr_AvLdSt(False/*store*/, 16, tmp, am_off0));
5549
5550            /* Fetch I64 from stack */
5551            if (IEndianess == Iend_LE) {
5552               addInstr(env, PPCInstr_Load( 8, dst, am_off8, mode64 ));
5553            } else {
5554               addInstr(env, PPCInstr_Load( 8, dst, am_off0, mode64 ));
5555            }
5556
5557            add_to_sp( env, 32 );          // Reset SP
5558            return dst;
5559         }
5560
5561      case Iop_NotV128: {
5562         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5563         HReg dst = newVRegV(env);
5564         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
5565         return dst;
5566      }
5567
5568      case Iop_CmpNEZ8x16: {
5569         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5570         HReg zero = newVRegV(env);
5571         HReg dst  = newVRegV(env);
5572         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5573         addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
5574         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5575         return dst;
5576      }
5577
5578      case Iop_CmpNEZ16x8: {
5579         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5580         HReg zero = newVRegV(env);
5581         HReg dst  = newVRegV(env);
5582         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5583         addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
5584         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5585         return dst;
5586      }
5587
5588      case Iop_CmpNEZ32x4: {
5589         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5590         HReg zero = newVRegV(env);
5591         HReg dst  = newVRegV(env);
5592         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5593         addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
5594         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5595         return dst;
5596      }
5597
5598      case Iop_CmpNEZ64x2: {
5599         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5600         HReg zero = newVRegV(env);
5601         HReg dst  = newVRegV(env);
5602         addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
5603         addInstr(env, PPCInstr_AvBin64x2(Pav_CMPEQU, dst, arg, zero));
5604         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5605         return dst;
5606      }
5607
5608      case Iop_RecipEst32Fx4: fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
5609      case Iop_RSqrtEst32Fx4: fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
5610      case Iop_I32UtoFx4:     fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
5611      case Iop_I32StoFx4:     fpop = Pavfp_CVTS2F;  goto do_32Fx4_unary;
5612      case Iop_QFtoI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
5613      case Iop_QFtoI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
5614      case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM;  goto do_32Fx4_unary;
5615      case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP;  goto do_32Fx4_unary;
5616      case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN;  goto do_32Fx4_unary;
5617      case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
5618      do_32Fx4_unary:
5619      {
5620         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5621         HReg dst = newVRegV(env);
5622         addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
5623         return dst;
5624      }
5625
5626      case Iop_32UtoV128: {
5627         HReg r_aligned16, r_zeros;
5628         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
5629         HReg   dst = newVRegV(env);
5630         PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
5631         sub_from_sp( env, 32 );     // Move SP down
5632
5633         /* Get a quadword aligned address within our stack space */
5634         r_aligned16 = get_sp_aligned16( env );
5635         am_off0  = PPCAMode_IR( 0,  r_aligned16 );
5636         am_off4  = PPCAMode_IR( 4,  r_aligned16 );
5637         am_off8  = PPCAMode_IR( 8,  r_aligned16 );
5638         am_off12 = PPCAMode_IR( 12, r_aligned16 );
5639
5640         /* Store zeros */
5641         r_zeros = newVRegI(env);
5642         addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
5643         if (IEndianess == Iend_LE)
5644            addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
5645         else
5646            addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
5647         addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
5648         addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
5649
5650         /* Store r_src in low word of quadword-aligned mem */
5651         if (IEndianess == Iend_LE)
5652            addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
5653         else
5654            addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
5655
5656         /* Load word into low word of quadword vector reg */
5657         if (IEndianess == Iend_LE)
5658            addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
5659         else
5660            addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
5661
5662         add_to_sp( env, 32 );       // Reset SP
5663         return dst;
5664      }
5665
5666      case Iop_Dup8x16:
5667      case Iop_Dup16x8:
5668      case Iop_Dup32x4:
5669         return mk_AvDuplicateRI(env, e->Iex.Unop.arg, IEndianess);
5670
5671      case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
5672      do_AvCipherV128Un: {
5673         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5674         HReg dst = newVRegV(env);
5675         addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
5676         return dst;
5677      }
5678
5679      case Iop_Clz8x16: op = Pav_ZEROCNTBYTE;   goto do_zerocnt;
5680      case Iop_Clz16x8: op = Pav_ZEROCNTHALF;   goto do_zerocnt;
5681      case Iop_Clz32x4: op = Pav_ZEROCNTWORD;   goto do_zerocnt;
5682      case Iop_Clz64x2: op = Pav_ZEROCNTDBL;    goto do_zerocnt;
5683      case Iop_Ctz8x16: op = Pav_TRAILINGZEROCNTBYTE; goto do_zerocnt;
5684      case Iop_Ctz16x8: op = Pav_TRAILINGZEROCNTHALF; goto do_zerocnt;
5685      case Iop_Ctz32x4: op = Pav_TRAILINGZEROCNTWORD; goto do_zerocnt;
5686      case Iop_Ctz64x2: op = Pav_TRAILINGZEROCNTDBL;  goto do_zerocnt;
5687      case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE;  goto do_zerocnt;
5688      do_zerocnt:
5689      {
5690        HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5691        HReg dst = newVRegV(env);
5692        addInstr(env, PPCInstr_AvUnary(op, dst, arg));
5693        return dst;
5694      }
5695
5696      /* BCD Iops */
5697      case Iop_BCD128toI128S:
5698         {
5699            HReg dst  = newVRegV(env);
5700            HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5701            addInstr(env, PPCInstr_AvUnary( Pav_BCD128toI128S, dst, arg ) );
5702            return dst;
5703         }
5704
5705      case Iop_MulI128by10:       op = Pav_MulI128by10;      goto do_MulI128;
5706      case Iop_MulI128by10Carry:  op = Pav_MulI128by10Carry; goto do_MulI128;
5707      do_MulI128: {
5708            HReg dst = newVRegV(env);
5709            HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
5710            addInstr(env, PPCInstr_AvUnary(op, dst, arg));
5711            return dst;
5712         }
5713
5714      default:
5715         break;
5716      } /* switch (e->Iex.Unop.op) */
5717   } /* if (e->tag == Iex_Unop) */
5718
5719   if (e->tag == Iex_Binop) {
5720      switch (e->Iex.Binop.op) {
5721
5722      case Iop_64HLtoV128: {
5723         if (!mode64) {
5724            HReg     r3, r2, r1, r0, r_aligned16;
5725            PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
5726            HReg     dst = newVRegV(env);
5727            /* do this via the stack (easy, convenient, etc) */
5728            sub_from_sp( env, 32 );        // Move SP down
5729
5730            // get a quadword aligned address within our stack space
5731            r_aligned16 = get_sp_aligned16( env );
5732            am_off0  = PPCAMode_IR( 0,  r_aligned16 );
5733            am_off4  = PPCAMode_IR( 4,  r_aligned16 );
5734            am_off8  = PPCAMode_IR( 8,  r_aligned16 );
5735            am_off12 = PPCAMode_IR( 12, r_aligned16 );
5736
5737            /* Do the less significant 64 bits */
5738            iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2, IEndianess);
5739            addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
5740            addInstr(env, PPCInstr_Store( 4, am_off8,  r1, mode64 ));
5741            /* Do the more significant 64 bits */
5742            iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1, IEndianess);
5743            addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
5744            addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
5745
5746            /* Fetch result back from stack. */
5747            addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
5748
5749            add_to_sp( env, 32 );          // Reset SP
5750            return dst;
5751         } else {
5752            HReg     rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
5753            HReg     rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
5754            HReg     dst = newVRegV(env);
5755            HReg     r_aligned16;
5756            PPCAMode *am_off0, *am_off8;
5757            /* do this via the stack (easy, convenient, etc) */
5758            sub_from_sp( env, 32 );        // Move SP down
5759
5760            // get a quadword aligned address within our stack space
5761            r_aligned16 = get_sp_aligned16( env );
5762            am_off0  = PPCAMode_IR( 0,  r_aligned16 );
5763            am_off8  = PPCAMode_IR( 8,  r_aligned16 );
5764
5765            /* Store 2*I64 to stack */
5766            if (IEndianess == Iend_LE) {
5767               addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
5768               addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
5769            } else {
5770               addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
5771               addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
5772            }
5773            /* Fetch result back from stack. */
5774            addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
5775
5776            add_to_sp( env, 32 );          // Reset SP
5777            return dst;
5778         }
5779      }
5780
5781      case Iop_Max32Fx4:   fpop = Pavfp_MAXF;   goto do_32Fx4;
5782      case Iop_Min32Fx4:   fpop = Pavfp_MINF;   goto do_32Fx4;
5783      case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
5784      case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
5785      case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
5786      do_32Fx4:
5787      {
5788         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5789         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5790         HReg dst = newVRegV(env);
5791         addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
5792         return dst;
5793      }
5794
5795      case Iop_CmpLE32Fx4: {
5796         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5797         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5798         HReg dst = newVRegV(env);
5799
5800         /* stay consistent with native ppc compares:
5801            if a left/right lane holds a nan, return zeros for that lane
5802            so: le == NOT(gt OR isNan)
5803          */
5804         HReg isNanLR = newVRegV(env);
5805         HReg isNanL = isNan(env, argL, IEndianess);
5806         HReg isNanR = isNan(env, argR, IEndianess);
5807         addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
5808                                         isNanL, isNanR));
5809
5810         addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
5811                                           argL, argR));
5812         addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
5813         addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
5814         return dst;
5815      }
5816
5817      case Iop_AndV128:    op = Pav_AND;      goto do_AvBin;
5818      case Iop_OrV128:     op = Pav_OR;       goto do_AvBin;
5819      case Iop_XorV128:    op = Pav_XOR;      goto do_AvBin;
5820      do_AvBin: {
5821         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5822         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5823         HReg dst  = newVRegV(env);
5824         addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
5825         return dst;
5826      }
5827
5828      case Iop_Shl8x16:    op = Pav_SHL;    goto do_AvBin8x16;
5829      case Iop_Shr8x16:    op = Pav_SHR;    goto do_AvBin8x16;
5830      case Iop_Sar8x16:    op = Pav_SAR;    goto do_AvBin8x16;
5831      case Iop_Rol8x16:    op = Pav_ROTL;   goto do_AvBin8x16;
5832      case Iop_InterleaveHI8x16: op = Pav_MRGHI;  goto do_AvBin8x16;
5833      case Iop_InterleaveLO8x16: op = Pav_MRGLO;  goto do_AvBin8x16;
5834      case Iop_Add8x16:    op = Pav_ADDU;   goto do_AvBin8x16;
5835      case Iop_QAdd8Ux16:  op = Pav_QADDU;  goto do_AvBin8x16;
5836      case Iop_QAdd8Sx16:  op = Pav_QADDS;  goto do_AvBin8x16;
5837      case Iop_Sub8x16:    op = Pav_SUBU;   goto do_AvBin8x16;
5838      case Iop_QSub8Ux16:  op = Pav_QSUBU;  goto do_AvBin8x16;
5839      case Iop_QSub8Sx16:  op = Pav_QSUBS;  goto do_AvBin8x16;
5840      case Iop_Avg8Ux16:   op = Pav_AVGU;   goto do_AvBin8x16;
5841      case Iop_Avg8Sx16:   op = Pav_AVGS;   goto do_AvBin8x16;
5842      case Iop_Max8Ux16:   op = Pav_MAXU;   goto do_AvBin8x16;
5843      case Iop_Max8Sx16:   op = Pav_MAXS;   goto do_AvBin8x16;
5844      case Iop_Min8Ux16:   op = Pav_MINU;   goto do_AvBin8x16;
5845      case Iop_Min8Sx16:   op = Pav_MINS;   goto do_AvBin8x16;
5846      case Iop_MullEven8Ux16: op = Pav_OMULU;  goto do_AvBin8x16;
5847      case Iop_MullEven8Sx16: op = Pav_OMULS;  goto do_AvBin8x16;
5848      case Iop_CmpEQ8x16:  op = Pav_CMPEQU; goto do_AvBin8x16;
5849      case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
5850      case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
5851      case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
5852      do_AvBin8x16: {
5853         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5854         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5855         HReg dst  = newVRegV(env);
5856         addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
5857         return dst;
5858      }
5859
5860      case Iop_Shl16x8:    op = Pav_SHL;    goto do_AvBin16x8;
5861      case Iop_Shr16x8:    op = Pav_SHR;    goto do_AvBin16x8;
5862      case Iop_Sar16x8:    op = Pav_SAR;    goto do_AvBin16x8;
5863      case Iop_Rol16x8:    op = Pav_ROTL;   goto do_AvBin16x8;
5864      case Iop_NarrowBin16to8x16:    op = Pav_PACKUU;  goto do_AvBin16x8;
5865      case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
5866      case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
5867      case Iop_InterleaveHI16x8:  op = Pav_MRGHI;  goto do_AvBin16x8;
5868      case Iop_InterleaveLO16x8:  op = Pav_MRGLO;  goto do_AvBin16x8;
5869      case Iop_Add16x8:    op = Pav_ADDU;   goto do_AvBin16x8;
5870      case Iop_QAdd16Ux8:  op = Pav_QADDU;  goto do_AvBin16x8;
5871      case Iop_QAdd16Sx8:  op = Pav_QADDS;  goto do_AvBin16x8;
5872      case Iop_Sub16x8:    op = Pav_SUBU;   goto do_AvBin16x8;
5873      case Iop_QSub16Ux8:  op = Pav_QSUBU;  goto do_AvBin16x8;
5874      case Iop_QSub16Sx8:  op = Pav_QSUBS;  goto do_AvBin16x8;
5875      case Iop_Avg16Ux8:   op = Pav_AVGU;   goto do_AvBin16x8;
5876      case Iop_Avg16Sx8:   op = Pav_AVGS;   goto do_AvBin16x8;
5877      case Iop_Max16Ux8:   op = Pav_MAXU;   goto do_AvBin16x8;
5878      case Iop_Max16Sx8:   op = Pav_MAXS;   goto do_AvBin16x8;
5879      case Iop_Min16Ux8:   op = Pav_MINU;   goto do_AvBin16x8;
5880      case Iop_Min16Sx8:   op = Pav_MINS;   goto do_AvBin16x8;
5881      case Iop_MullEven16Ux8: op = Pav_OMULU;  goto do_AvBin16x8;
5882      case Iop_MullEven16Sx8: op = Pav_OMULS;  goto do_AvBin16x8;
5883      case Iop_CmpEQ16x8:  op = Pav_CMPEQU; goto do_AvBin16x8;
5884      case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
5885      case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
5886      case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
5887      do_AvBin16x8: {
5888         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5889         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5890         HReg dst  = newVRegV(env);
5891         addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
5892         return dst;
5893      }
5894
5895      case Iop_Shl32x4:    op = Pav_SHL;    goto do_AvBin32x4;
5896      case Iop_Shr32x4:    op = Pav_SHR;    goto do_AvBin32x4;
5897      case Iop_Sar32x4:    op = Pav_SAR;    goto do_AvBin32x4;
5898      case Iop_Rol32x4:    op = Pav_ROTL;   goto do_AvBin32x4;
5899      case Iop_NarrowBin32to16x8:    op = Pav_PACKUU;  goto do_AvBin32x4;
5900      case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
5901      case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
5902      case Iop_InterleaveHI32x4:  op = Pav_MRGHI;  goto do_AvBin32x4;
5903      case Iop_InterleaveLO32x4:  op = Pav_MRGLO;  goto do_AvBin32x4;
5904      case Iop_Add32x4:    op = Pav_ADDU;   goto do_AvBin32x4;
5905      case Iop_QAdd32Ux4:  op = Pav_QADDU;  goto do_AvBin32x4;
5906      case Iop_QAdd32Sx4:  op = Pav_QADDS;  goto do_AvBin32x4;
5907      case Iop_Sub32x4:    op = Pav_SUBU;   goto do_AvBin32x4;
5908      case Iop_QSub32Ux4:  op = Pav_QSUBU;  goto do_AvBin32x4;
5909      case Iop_QSub32Sx4:  op = Pav_QSUBS;  goto do_AvBin32x4;
5910      case Iop_Avg32Ux4:   op = Pav_AVGU;   goto do_AvBin32x4;
5911      case Iop_Avg32Sx4:   op = Pav_AVGS;   goto do_AvBin32x4;
5912      case Iop_Max32Ux4:   op = Pav_MAXU;   goto do_AvBin32x4;
5913      case Iop_Max32Sx4:   op = Pav_MAXS;   goto do_AvBin32x4;
5914      case Iop_Min32Ux4:   op = Pav_MINU;   goto do_AvBin32x4;
5915      case Iop_Min32Sx4:   op = Pav_MINS;   goto do_AvBin32x4;
5916      case Iop_Mul32x4:    op = Pav_MULU;   goto do_AvBin32x4;
5917      case Iop_MullEven32Ux4: op = Pav_OMULU;  goto do_AvBin32x4;
5918      case Iop_MullEven32Sx4: op = Pav_OMULS;  goto do_AvBin32x4;
5919      case Iop_CmpEQ32x4:  op = Pav_CMPEQU; goto do_AvBin32x4;
5920      case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
5921      case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
5922      case Iop_CatOddLanes32x4:  op = Pav_CATODD;  goto do_AvBin32x4;
5923      case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
5924      case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
5925      do_AvBin32x4: {
5926         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5927         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5928         HReg dst  = newVRegV(env);
5929         addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
5930         return dst;
5931      }
5932
5933      case Iop_Shl64x2:    op = Pav_SHL;    goto do_AvBin64x2;
5934      case Iop_Shr64x2:    op = Pav_SHR;    goto do_AvBin64x2;
5935      case Iop_Sar64x2:    op = Pav_SAR;    goto do_AvBin64x2;
5936      case Iop_Rol64x2:    op = Pav_ROTL;   goto do_AvBin64x2;
5937      case Iop_NarrowBin64to32x4:    op = Pav_PACKUU;  goto do_AvBin64x2;
5938      case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2;
5939      case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2;
5940      case Iop_InterleaveHI64x2:  op = Pav_MRGHI;  goto do_AvBin64x2;
5941      case Iop_InterleaveLO64x2:  op = Pav_MRGLO;  goto do_AvBin64x2;
5942      case Iop_Add64x2:    op = Pav_ADDU;   goto do_AvBin64x2;
5943      case Iop_Sub64x2:    op = Pav_SUBU;   goto do_AvBin64x2;
5944      case Iop_Max64Ux2:   op = Pav_MAXU;   goto do_AvBin64x2;
5945      case Iop_Max64Sx2:   op = Pav_MAXS;   goto do_AvBin64x2;
5946      case Iop_Min64Ux2:   op = Pav_MINU;   goto do_AvBin64x2;
5947      case Iop_Min64Sx2:   op = Pav_MINS;   goto do_AvBin64x2;
5948      case Iop_CmpEQ64x2:  op = Pav_CMPEQU; goto do_AvBin64x2;
5949      case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2;
5950      case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
5951      case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
5952      do_AvBin64x2: {
5953         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5954         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
5955         HReg dst  = newVRegV(env);
5956         addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
5957         return dst;
5958      }
5959
5960      case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
5961      case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
5962      do_AvShift8x16: {
5963         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5964         HReg dst    = newVRegV(env);
5965         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
5966         addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
5967         return dst;
5968      }
5969
5970      case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
5971      case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
5972      case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
5973      do_AvShift16x8: {
5974         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5975         HReg dst    = newVRegV(env);
5976         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
5977         addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
5978         return dst;
5979      }
5980
5981      case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
5982      case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
5983      case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
5984      do_AvShift32x4: {
5985         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5986         HReg dst    = newVRegV(env);
5987         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
5988         addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
5989         return dst;
5990      }
5991
5992      case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2;
5993      case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
5994      case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
5995      do_AvShift64x2: {
5996         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
5997         HReg dst    = newVRegV(env);
5998         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
5999         addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
6000         return dst;
6001      }
6002
6003      case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
6004      case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
6005      do_AvShiftV128: {
6006         HReg dst    = newVRegV(env);
6007         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6008         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
6009         /* Note: shift value gets masked by 127 */
6010         addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
6011         return dst;
6012      }
6013
6014      case Iop_Perm8x16: {
6015         HReg dst   = newVRegV(env);
6016         HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6017         HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6018         addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
6019         return dst;
6020      }
6021
6022      case Iop_CipherV128:  op = Pav_CIPHERV128;   goto do_AvCipherV128;
6023      case Iop_CipherLV128: op = Pav_CIPHERLV128;  goto do_AvCipherV128;
6024      case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
6025      case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
6026      do_AvCipherV128: {
6027         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6028         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6029         HReg dst  = newVRegV(env);
6030         addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
6031         return dst;
6032      }
6033
6034      case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
6035      case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
6036      do_AvHashV128: {
6037         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6038         HReg dst  = newVRegV(env);
6039         PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6040         addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
6041         return dst;
6042      }
6043
6044      /* BCD Iops */
6045      case Iop_I128StoBCD128:
6046         {
6047            HReg dst = newVRegV(env);
6048            HReg arg = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6049            PPCRI* ps = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
6050
6051            addInstr(env, PPCInstr_AvBinaryInt( Pav_I128StoBCD128, dst, arg,
6052                                                ps ) );
6053            return dst;
6054         }
6055
6056      case Iop_MulI128by10E:       op = Pav_MulI128by10E;      goto do_MulI128E;
6057      case Iop_MulI128by10ECarry:  op = Pav_MulI128by10ECarry; goto do_MulI128E;
6058      do_MulI128E: {
6059            HReg dst  = newVRegV(env);
6060            HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6061            HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6062            addInstr(env, PPCInstr_AvBinary(op, dst, argL, argR));
6063            return dst;
6064         }
6065
6066      case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
6067      case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
6068      do_AvBCDV128: {
6069         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
6070         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
6071         HReg dst  = newVRegV(env);
6072         addInstr(env, PPCInstr_AvBCDV128Binary(op, dst, arg1, arg2));
6073         return dst;
6074      }
6075
6076      default:
6077         break;
6078      } /* switch (e->Iex.Binop.op) */
6079   } /* if (e->tag == Iex_Binop) */
6080
6081   if (e->tag == Iex_Triop) {
6082      IRTriop *triop = e->Iex.Triop.details;
6083      switch (triop->op) {
6084      case Iop_Add32Fx4: fpop = Pavfp_ADDF; goto do_32Fx4_with_rm;
6085      case Iop_Sub32Fx4: fpop = Pavfp_SUBF; goto do_32Fx4_with_rm;
6086      case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
6087      do_32Fx4_with_rm:
6088      {
6089         HReg argL = iselVecExpr(env, triop->arg2, IEndianess);
6090         HReg argR = iselVecExpr(env, triop->arg3, IEndianess);
6091         HReg dst  = newVRegV(env);
6092         /* FIXME: this is bogus, in the sense that Altivec ignores
6093            FPSCR.RM, at least for some FP operations.  So setting the
6094            RM is pointless.  This is only really correct in the case
6095            where the RM is known, at JIT time, to be Irrm_NEAREST,
6096            since -- at least for Altivec FP add/sub/mul -- the
6097            emitted insn is hardwired to round to nearest. */
6098         set_FPU_rounding_mode(env, triop->arg1, IEndianess);
6099         addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
6100         return dst;
6101      }
6102
6103      default:
6104         break;
6105      } /* switch (e->Iex.Triop.op) */
6106   } /* if (e->tag == Iex_Trinop) */
6107
6108
6109   if (e->tag == Iex_Const ) {
6110      vassert(e->Iex.Const.con->tag == Ico_V128);
6111      if (e->Iex.Const.con->Ico.V128 == 0x0000) {
6112         return generate_zeroes_V128(env);
6113      }
6114      else if (e->Iex.Const.con->Ico.V128 == 0xffff) {
6115         return generate_ones_V128(env);
6116      }
6117   }
6118
6119   vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
6120              LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
6121                                 env->hwcaps));
6122   ppIRExpr(e);
6123   vpanic("iselVecExpr_wrk(ppc)");
6124}
6125
6126
6127/*---------------------------------------------------------*/
6128/*--- ISEL: Statements                                  ---*/
6129/*---------------------------------------------------------*/
6130
6131static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
6132{
6133   Bool mode64 = env->mode64;
6134   if (vex_traceflags & VEX_TRACE_VCODE) {
6135      vex_printf("\n -- ");
6136      ppIRStmt(stmt);
6137      vex_printf("\n");
6138   }
6139
6140   switch (stmt->tag) {
6141
6142   /* --------- STORE --------- */
6143   case Ist_Store: {
6144      IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6145      IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6146      IREndness end   = stmt->Ist.Store.end;
6147
6148      if (end != IEndianess)
6149         goto stmt_fail;
6150      if (!mode64 && (tya != Ity_I32))
6151         goto stmt_fail;
6152      if (mode64 && (tya != Ity_I64))
6153         goto stmt_fail;
6154
6155      if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
6156          (mode64 && (tyd == Ity_I64))) {
6157         PPCAMode* am_addr
6158            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6159                                 IEndianess);
6160         HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data, IEndianess);
6161         addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
6162                                       am_addr, r_src, mode64 ));
6163         return;
6164      }
6165      if (tyd == Ity_F64) {
6166         PPCAMode* am_addr
6167            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6168                                 IEndianess);
6169         HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data, IEndianess);
6170         addInstr(env,
6171                  PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6172         return;
6173      }
6174      if (tyd == Ity_F32) {
6175         PPCAMode* am_addr
6176            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6177                                 IEndianess);
6178         HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data, IEndianess);
6179         addInstr(env,
6180                  PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6181         return;
6182      }
6183      if (tyd == Ity_D64) {
6184         PPCAMode* am_addr
6185            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6186                                 IEndianess);
6187         HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data, IEndianess);
6188         addInstr(env,
6189                  PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
6190         return;
6191      }
6192      if (tyd == Ity_D32) {
6193         PPCAMode* am_addr
6194            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6195                                 IEndianess);
6196         HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data, IEndianess);
6197         addInstr(env,
6198                  PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
6199         return;
6200      }
6201      if (tyd == Ity_V128) {
6202         PPCAMode* am_addr
6203            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
6204                                 IEndianess);
6205         HReg v_src = iselVecExpr(env, stmt->Ist.Store.data, IEndianess);
6206         addInstr(env,
6207                  PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6208         return;
6209      }
6210      if (tyd == Ity_I64 && !mode64) {
6211         /* Just calculate the address in the register.  Life is too
6212            short to arse around trying and possibly failing to adjust
6213            the offset in a 'reg+offset' style amode. */
6214         HReg rHi32, rLo32;
6215         HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr, IEndianess);
6216         iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data,
6217                        IEndianess );
6218         addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6219                                       PPCAMode_IR( 0, r_addr ),
6220                                       rHi32,
6221                                       False/*32-bit insn please*/) );
6222         addInstr(env, PPCInstr_Store( 4/*byte-store*/,
6223                                       PPCAMode_IR( 4, r_addr ),
6224                                       rLo32,
6225                                       False/*32-bit insn please*/) );
6226         return;
6227      }
6228      break;
6229   }
6230
6231   /* --------- PUT --------- */
6232   case Ist_Put: {
6233      IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6234      if (ty == Ity_I8  || ty == Ity_I16 ||
6235          ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6236         HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data, IEndianess);
6237         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6238                                          GuestStatePtr(mode64) );
6239         addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
6240                                       am_addr, r_src, mode64 ));
6241         return;
6242      }
6243      if (!mode64 && ty == Ity_I64) {
6244         HReg rHi, rLo;
6245         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6246                                           GuestStatePtr(mode64) );
6247         PPCAMode* am_addr4 = advance4(env, am_addr);
6248         iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6249         addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
6250         addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6251         return;
6252      }
6253      if (ty == Ity_I128) {
6254         HReg rHi, rLo;
6255         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6256                                           GuestStatePtr(mode64) );
6257         PPCAMode* am_addr4 = advance4(env, am_addr);
6258
6259         iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
6260         addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
6261         addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
6262         return;
6263      }
6264      if (ty == Ity_F128) {
6265         /* Guest state vectors are 16byte aligned,
6266            so don't need to worry here */
6267         HReg v_src = iselFp128Expr(env, stmt->Ist.Put.data, IEndianess);
6268
6269         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6270                                           GuestStatePtr(mode64) );
6271         addInstr(env,
6272                  PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6273         return;
6274      }
6275      if (ty == Ity_V128) {
6276         /* Guest state vectors are 16byte aligned,
6277            so don't need to worry here */
6278         HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
6279         PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
6280                                           GuestStatePtr(mode64) );
6281         addInstr(env,
6282                  PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
6283         return;
6284      }
6285      if (ty == Ity_F64) {
6286         HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data, IEndianess);
6287         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6288                                          GuestStatePtr(mode64) );
6289         addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
6290                                        fr_src, am_addr ));
6291         return;
6292      }
6293      if (ty == Ity_D32) {
6294         /* The 32-bit value is stored in a 64-bit register */
6295         HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data, IEndianess );
6296         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6297                                          GuestStatePtr(mode64) );
6298         addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
6299                                         fr_src, am_addr ) );
6300         return;
6301      }
6302      if (ty == Ity_D64) {
6303         HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data, IEndianess );
6304         PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
6305                                          GuestStatePtr(mode64) );
6306         addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
6307         return;
6308      }
6309      break;
6310   }
6311
6312   /* --------- Indexed PUT --------- */
6313   case Ist_PutI: {
6314      IRPutI *puti = stmt->Ist.PutI.details;
6315
6316      PPCAMode* dst_am
6317         = genGuestArrayOffset(
6318              env, puti->descr,
6319              puti->ix, puti->bias,
6320              IEndianess );
6321      IRType ty = typeOfIRExpr(env->type_env, puti->data);
6322      if (mode64 && ty == Ity_I64) {
6323         HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6324         addInstr(env, PPCInstr_Store( toUChar(8),
6325                                       dst_am, r_src, mode64 ));
6326         return;
6327      }
6328      if ((!mode64) && ty == Ity_I32) {
6329         HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
6330         addInstr(env, PPCInstr_Store( toUChar(4),
6331                                       dst_am, r_src, mode64 ));
6332         return;
6333      }
6334      break;
6335   }
6336
6337   /* --------- TMP --------- */
6338   case Ist_WrTmp: {
6339      IRTemp tmp = stmt->Ist.WrTmp.tmp;
6340      IRType ty = typeOfIRTemp(env->type_env, tmp);
6341      if (ty == Ity_I8  || ty == Ity_I16 ||
6342          ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
6343         HReg r_dst = lookupIRTemp(env, tmp);
6344         HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data, IEndianess);
6345         addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
6346         return;
6347      }
6348      if (!mode64 && ty == Ity_I64) {
6349         HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6350
6351         iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6352                       IEndianess);
6353         lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6354         addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6355         addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6356         return;
6357      }
6358      if (mode64 && ty == Ity_I128) {
6359         HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
6360         iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
6361                        IEndianess);
6362         lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
6363         addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
6364         addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
6365         return;
6366      }
6367      if (!mode64 && ty == Ity_I128) {
6368         HReg r_srcHi, r_srcMedHi, r_srcMedLo, r_srcLo;
6369         HReg r_dstHi, r_dstMedHi, r_dstMedLo, r_dstLo;
6370
6371         iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
6372                                &r_srcMedLo, &r_srcLo,
6373                                env, stmt->Ist.WrTmp.data, IEndianess);
6374
6375         lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
6376                           &r_dstLo, env, tmp);
6377
6378         addInstr(env, mk_iMOVds_RR(r_dstHi,    r_srcHi) );
6379         addInstr(env, mk_iMOVds_RR(r_dstMedHi, r_srcMedHi) );
6380         addInstr(env, mk_iMOVds_RR(r_dstMedLo, r_srcMedLo) );
6381         addInstr(env, mk_iMOVds_RR(r_dstLo,    r_srcLo) );
6382         return;
6383      }
6384      if (ty == Ity_I1) {
6385         PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data,
6386                                         IEndianess);
6387         HReg r_dst = lookupIRTemp(env, tmp);
6388         addInstr(env, PPCInstr_Set(cond, r_dst));
6389         return;
6390      }
6391      if (ty == Ity_F64) {
6392         HReg fr_dst = lookupIRTemp(env, tmp);
6393         HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6394         addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6395         return;
6396      }
6397      if (ty == Ity_F32) {
6398         HReg fr_dst = lookupIRTemp(env, tmp);
6399         HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6400         addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
6401         return;
6402      }
6403      if (ty == Ity_D32) {
6404         HReg fr_dst = lookupIRTemp(env, tmp);
6405         HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6406         addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
6407         return;
6408      }
6409      if (ty == Ity_F128) {
6410         HReg v_dst = lookupIRTemp(env, tmp);
6411         HReg v_src = iselFp128Expr(env, stmt->Ist.WrTmp.data, IEndianess);
6412         addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6413         return;
6414      }
6415      if (ty == Ity_V128) {
6416         HReg v_dst = lookupIRTemp(env, tmp);
6417         HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
6418         addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
6419         return;
6420      }
6421      if (ty == Ity_D64) {
6422         HReg fr_dst = lookupIRTemp( env, tmp );
6423         HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data, IEndianess );
6424         addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
6425         return;
6426      }
6427      if (ty == Ity_D128) {
6428         HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
6429	 //         lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6430         lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
6431         iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data,
6432                         IEndianess );
6433         addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
6434         addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
6435         return;
6436      }
6437      break;
6438   }
6439
6440   /* --------- Load Linked or Store Conditional --------- */
6441   case Ist_LLSC: {
6442      IRTemp res    = stmt->Ist.LLSC.result;
6443      IRType tyRes  = typeOfIRTemp(env->type_env, res);
6444      IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
6445
6446      if (stmt->Ist.LLSC.end != IEndianess)
6447         goto stmt_fail;
6448      if (!mode64 && (tyAddr != Ity_I32))
6449         goto stmt_fail;
6450      if (mode64 && (tyAddr != Ity_I64))
6451         goto stmt_fail;
6452
6453      if (stmt->Ist.LLSC.storedata == NULL) {
6454         /* LL */
6455         HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
6456         HReg r_dst  = lookupIRTemp(env, res);
6457         if (tyRes == Ity_I8) {
6458            addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
6459            return;
6460         }
6461         if (tyRes == Ity_I16) {
6462            addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
6463            return;
6464         }
6465         if (tyRes == Ity_I32) {
6466            addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
6467            return;
6468         }
6469         if (tyRes == Ity_I64 && mode64) {
6470            addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
6471            return;
6472         }
6473         /* fallthru */;
6474      } else {
6475         /* SC */
6476         HReg   r_res  = lookupIRTemp(env, res); /* :: Ity_I1 */
6477         HReg   r_a    = iselWordExpr_R(env, stmt->Ist.LLSC.addr, IEndianess);
6478         HReg   r_src  = iselWordExpr_R(env, stmt->Ist.LLSC.storedata,
6479                                        IEndianess);
6480         HReg   r_tmp  = newVRegI(env);
6481         IRType tyData = typeOfIRExpr(env->type_env,
6482                                      stmt->Ist.LLSC.storedata);
6483         vassert(tyRes == Ity_I1);
6484         if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
6485            (tyData == Ity_I64 && mode64)) {
6486            int size = 0;
6487
6488            if (tyData == Ity_I64)
6489               size = 8;
6490            else if (tyData == Ity_I32)
6491               size = 4;
6492            else if (tyData == Ity_I16)
6493               size = 2;
6494            else if (tyData == Ity_I8)
6495               size = 1;
6496
6497            addInstr(env, PPCInstr_StoreC( size,
6498                                           r_a, r_src, mode64 ));
6499            addInstr(env, PPCInstr_MfCR( r_tmp ));
6500            addInstr(env, PPCInstr_Shft(
6501                             Pshft_SHR,
6502                             env->mode64 ? False : True
6503                                /*F:64-bit, T:32-bit shift*/,
6504                             r_tmp, r_tmp,
6505                             PPCRH_Imm(False/*unsigned*/, 29)));
6506            /* Probably unnecessary, since the IR dest type is Ity_I1,
6507               and so we are entitled to leave whatever junk we like
6508               drifting round in the upper 31 or 63 bits of r_res.
6509               However, for the sake of conservativeness .. */
6510            addInstr(env, PPCInstr_Alu(
6511                             Palu_AND,
6512                             r_res, r_tmp,
6513                             PPCRH_Imm(False/*signed*/, 1)));
6514            return;
6515         }
6516         /* fallthru */
6517      }
6518      goto stmt_fail;
6519      /*NOTREACHED*/
6520   }
6521
6522   /* --------- Call to DIRTY helper --------- */
6523   case Ist_Dirty: {
6524      IRDirty* d = stmt->Ist.Dirty.details;
6525
6526      /* Figure out the return type, if any. */
6527      IRType retty = Ity_INVALID;
6528      if (d->tmp != IRTemp_INVALID)
6529         retty = typeOfIRTemp(env->type_env, d->tmp);
6530
6531      /* Throw out any return types we don't know about.  The set of
6532         acceptable return types is the same in both 32- and 64-bit
6533         mode, so we don't need to inspect mode64 to make a
6534         decision. */
6535      Bool retty_ok = False;
6536      switch (retty) {
6537         case Ity_INVALID: /* function doesn't return anything */
6538         case Ity_V128:
6539         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6540            retty_ok = True; break;
6541         default:
6542            break;
6543      }
6544      if (!retty_ok)
6545         break; /* will go to stmt_fail: */
6546
6547      /* Marshal args, do the call, clear stack, set the return value
6548         to 0x555..555 if this is a conditional call that returns a
6549         value and the call is skipped. */
6550      UInt   addToSp = 0;
6551      RetLoc rloc    = mk_RetLoc_INVALID();
6552      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args,
6553                    IEndianess );
6554      vassert(is_sane_RetLoc(rloc));
6555
6556      /* Now figure out what to do with the returned value, if any. */
6557      switch (retty) {
6558         case Ity_INVALID: {
6559            /* No return value.  Nothing to do. */
6560            vassert(d->tmp == IRTemp_INVALID);
6561            vassert(rloc.pri == RLPri_None);
6562            vassert(addToSp == 0);
6563            return;
6564         }
6565         case Ity_I32: case Ity_I16: case Ity_I8: {
6566            /* The returned value is in %r3.  Park it in the register
6567               associated with tmp. */
6568            HReg r_dst = lookupIRTemp(env, d->tmp);
6569            addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
6570            vassert(rloc.pri == RLPri_Int);
6571            vassert(addToSp == 0);
6572            return;
6573         }
6574         case Ity_I64:
6575            if (mode64) {
6576               /* The returned value is in %r3.  Park it in the register
6577                  associated with tmp. */
6578               HReg r_dst = lookupIRTemp(env, d->tmp);
6579               addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
6580               vassert(rloc.pri == RLPri_Int);
6581               vassert(addToSp == 0);
6582            } else {
6583               /* The returned value is in %r3:%r4.  Park it in the
6584                  register-pair associated with tmp. */
6585               HReg r_dstHi = INVALID_HREG;
6586               HReg r_dstLo = INVALID_HREG;
6587               lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
6588               addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
6589               addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
6590               vassert(rloc.pri == RLPri_2Int);
6591               vassert(addToSp == 0);
6592            }
6593            return;
6594         case Ity_V128: {
6595            /* The returned value is on the stack, and *retloc tells
6596               us where.  Fish it off the stack and then move the
6597               stack pointer upwards to clear it, as directed by
6598               doHelperCall. */
6599            vassert(rloc.pri == RLPri_V128SpRel);
6600            vassert(addToSp >= 16);
6601            HReg      dst = lookupIRTemp(env, d->tmp);
6602            PPCAMode* am  = PPCAMode_IR(rloc.spOff, StackFramePtr(mode64));
6603            addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, dst, am ));
6604            add_to_sp(env, addToSp);
6605            return;
6606         }
6607         default:
6608            /*NOTREACHED*/
6609            vassert(0);
6610      }
6611   }
6612
6613   /* --------- MEM FENCE --------- */
6614   case Ist_MBE:
6615      switch (stmt->Ist.MBE.event) {
6616         case Imbe_Fence:
6617            addInstr(env, PPCInstr_MFence());
6618            return;
6619         default:
6620            break;
6621      }
6622      break;
6623
6624   /* --------- INSTR MARK --------- */
6625   /* Doesn't generate any executable code ... */
6626   case Ist_IMark:
6627       return;
6628
6629   /* --------- ABI HINT --------- */
6630   /* These have no meaning (denotation in the IR) and so we ignore
6631      them ... if any actually made it this far. */
6632   case Ist_AbiHint:
6633       return;
6634
6635   /* --------- NO-OP --------- */
6636   /* Fairly self-explanatory, wouldn't you say? */
6637   case Ist_NoOp:
6638       return;
6639
6640   /* --------- EXIT --------- */
6641   case Ist_Exit: {
6642      IRConst* dst = stmt->Ist.Exit.dst;
6643      if (!mode64 && dst->tag != Ico_U32)
6644         vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
6645      if (mode64 && dst->tag != Ico_U64)
6646         vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
6647
6648      PPCCondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard, IEndianess);
6649      PPCAMode*   amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
6650                                      hregPPC_GPR31(mode64));
6651
6652      /* Case: boring transfer to known address */
6653      if (stmt->Ist.Exit.jk == Ijk_Boring
6654          || stmt->Ist.Exit.jk == Ijk_Call
6655          /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
6656         if (env->chainingAllowed) {
6657            /* .. almost always true .. */
6658            /* Skip the event check at the dst if this is a forwards
6659               edge. */
6660            Bool toFastEP
6661               = mode64
6662               ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
6663               : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
6664            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6665            addInstr(env, PPCInstr_XDirect(
6666                             mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
6667                                    : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
6668                             amCIA, cc, toFastEP));
6669         } else {
6670            /* .. very occasionally .. */
6671            /* We can't use chaining, so ask for an assisted transfer,
6672               as that's the only alternative that is allowable. */
6673            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
6674                                    IEndianess);
6675            addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
6676         }
6677         return;
6678      }
6679
6680      /* Case: assisted transfer to arbitrary address */
6681      switch (stmt->Ist.Exit.jk) {
6682         /* Keep this list in sync with that in iselNext below */
6683         case Ijk_ClientReq:
6684         case Ijk_EmFail:
6685         case Ijk_EmWarn:
6686         case Ijk_NoDecode:
6687         case Ijk_NoRedir:
6688         case Ijk_SigBUS:
6689         case Ijk_SigTRAP:
6690         case Ijk_Sys_syscall:
6691         case Ijk_InvalICache:
6692         {
6693            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
6694                                    IEndianess);
6695            addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
6696                                             stmt->Ist.Exit.jk));
6697            return;
6698         }
6699         default:
6700            break;
6701      }
6702
6703      /* Do we ever expect to see any other kind? */
6704      goto stmt_fail;
6705   }
6706
6707   default: break;
6708   }
6709  stmt_fail:
6710   ppIRStmt(stmt);
6711   vpanic("iselStmt(ppc)");
6712}
6713
6714
6715/*---------------------------------------------------------*/
6716/*--- ISEL: Basic block terminators (Nexts)             ---*/
6717/*---------------------------------------------------------*/
6718
6719static void iselNext ( ISelEnv* env,
6720                       IRExpr* next, IRJumpKind jk, Int offsIP,
6721                       IREndness IEndianess)
6722{
6723   if (vex_traceflags & VEX_TRACE_VCODE) {
6724      vex_printf( "\n-- PUT(%d) = ", offsIP);
6725      ppIRExpr( next );
6726      vex_printf( "; exit-");
6727      ppIRJumpKind(jk);
6728      vex_printf( "\n");
6729   }
6730
6731   PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
6732
6733   /* Case: boring transfer to known address */
6734   if (next->tag == Iex_Const) {
6735      IRConst* cdst = next->Iex.Const.con;
6736      vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
6737      if (jk == Ijk_Boring || jk == Ijk_Call) {
6738         /* Boring transfer to known address */
6739         PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
6740         if (env->chainingAllowed) {
6741            /* .. almost always true .. */
6742            /* Skip the event check at the dst if this is a forwards
6743               edge. */
6744            Bool toFastEP
6745               = env->mode64
6746               ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
6747               : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
6748            if (0) vex_printf("%s", toFastEP ? "X" : ".");
6749            addInstr(env, PPCInstr_XDirect(
6750                             env->mode64 ? (Addr64)cdst->Ico.U64
6751                                         : (Addr64)cdst->Ico.U32,
6752                             amCIA, always, toFastEP));
6753         } else {
6754            /* .. very occasionally .. */
6755            /* We can't use chaining, so ask for an assisted transfer,
6756               as that's the only alternative that is allowable. */
6757            HReg r = iselWordExpr_R(env, next, IEndianess);
6758            addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
6759                                             Ijk_Boring));
6760         }
6761         return;
6762      }
6763   }
6764
6765   /* Case: call/return (==boring) transfer to any address */
6766   switch (jk) {
6767      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6768         HReg       r     = iselWordExpr_R(env, next, IEndianess);
6769         PPCAMode*  amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
6770         if (env->chainingAllowed) {
6771            addInstr(env, PPCInstr_XIndir(r, amCIA, always));
6772         } else {
6773            addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
6774                                             Ijk_Boring));
6775         }
6776         return;
6777      }
6778      default:
6779         break;
6780   }
6781
6782   /* Case: assisted transfer to arbitrary address */
6783   switch (jk) {
6784      /* Keep this list in sync with that for Ist_Exit above */
6785      case Ijk_ClientReq:
6786      case Ijk_EmFail:
6787      case Ijk_EmWarn:
6788      case Ijk_NoDecode:
6789      case Ijk_NoRedir:
6790      case Ijk_SigBUS:
6791      case Ijk_SigTRAP:
6792      case Ijk_Sys_syscall:
6793      case Ijk_InvalICache:
6794      {
6795         HReg      r     = iselWordExpr_R(env, next, IEndianess);
6796         PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
6797         addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
6798         return;
6799      }
6800      default:
6801         break;
6802   }
6803
6804   vex_printf( "\n-- PUT(%d) = ", offsIP);
6805   ppIRExpr( next );
6806   vex_printf( "; exit-");
6807   ppIRJumpKind(jk);
6808   vex_printf( "\n");
6809   vassert(0); // are we expecting any other kind?
6810}
6811
6812
6813/*---------------------------------------------------------*/
6814/*--- Insn selector top-level                           ---*/
6815/*---------------------------------------------------------*/
6816
6817/* Translate an entire SB to ppc code. */
6818HInstrArray* iselSB_PPC ( const IRSB* bb,
6819                          VexArch      arch_host,
6820                          const VexArchInfo* archinfo_host,
6821                          const VexAbiInfo*  vbi,
6822                          Int offs_Host_EvC_Counter,
6823                          Int offs_Host_EvC_FailAddr,
6824                          Bool chainingAllowed,
6825                          Bool addProfInc,
6826                          Addr max_ga)
6827
6828{
6829   Int       i, j;
6830   HReg      hregLo, hregMedLo, hregMedHi, hregHi;
6831   ISelEnv*  env;
6832   UInt      hwcaps_host = archinfo_host->hwcaps;
6833   Bool      mode64 = False;
6834   UInt      mask32, mask64;
6835   PPCAMode *amCounter, *amFailAddr;
6836   IREndness IEndianess;
6837
6838   vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
6839   mode64 = arch_host == VexArchPPC64;
6840
6841   /* do some sanity checks,
6842    * Note: no 32-bit support for ISA 3.0
6843    */
6844   mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
6845            | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX
6846            | VEX_HWCAPS_PPC32_DFP | VEX_HWCAPS_PPC32_ISA2_07;
6847
6848   mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
6849            | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX | VEX_HWCAPS_PPC64_DFP
6850            | VEX_HWCAPS_PPC64_ISA2_07 | VEX_HWCAPS_PPC64_ISA3_0;
6851
6852   if (mode64) {
6853      vassert((hwcaps_host & mask32) == 0);
6854   } else {
6855      vassert((hwcaps_host & mask64) == 0);
6856   }
6857
6858   /* Check that the host's endianness is as expected. */
6859   vassert((archinfo_host->endness == VexEndnessBE) ||
6860	   (archinfo_host->endness == VexEndnessLE));
6861
6862   if (archinfo_host->endness == VexEndnessBE)
6863     IEndianess = Iend_BE;
6864   else
6865     IEndianess = Iend_LE;
6866
6867   /* Make up an initial environment to use. */
6868   env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6869   env->vreg_ctr = 0;
6870
6871   /* Are we being ppc32 or ppc64? */
6872   env->mode64 = mode64;
6873
6874   /* Set up output code array. */
6875   env->code = newHInstrArray();
6876
6877   /* Copy BB's type env. */
6878   env->type_env = bb->tyenv;
6879
6880   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6881    * change as we go along.
6882    *
6883    * vregmap2 and vregmap3 are only used in 32 bit mode
6884    * for supporting I128 in 32-bit mode
6885    */
6886   env->n_vregmap = bb->tyenv->types_used;
6887   env->vregmapLo    = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6888   env->vregmapMedLo = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6889   if (mode64) {
6890      env->vregmapMedHi = NULL;
6891      env->vregmapHi    = NULL;
6892   } else {
6893      env->vregmapMedHi = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6894      env->vregmapHi    = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6895   }
6896
6897   /* and finally ... */
6898   env->chainingAllowed = chainingAllowed;
6899   env->max_ga          = max_ga;
6900   env->hwcaps          = hwcaps_host;
6901   env->previous_rm     = NULL;
6902   env->vbi             = vbi;
6903
6904   /* For each IR temporary, allocate a suitably-kinded virtual
6905      register. */
6906   j = 0;
6907   for (i = 0; i < env->n_vregmap; i++) {
6908      hregLo = hregMedLo = hregMedHi = hregHi = INVALID_HREG;
6909      switch (bb->tyenv->types[i]) {
6910      case Ity_I1:
6911      case Ity_I8:
6912      case Ity_I16:
6913      case Ity_I32:
6914         if (mode64) {
6915            hregLo = mkHReg(True, HRcInt64, 0, j++);
6916         } else {
6917            hregLo = mkHReg(True, HRcInt32, 0, j++);
6918         }
6919         break;
6920      case Ity_I64:
6921         if (mode64) {
6922            hregLo    = mkHReg(True, HRcInt64, 0, j++);
6923         } else {
6924            hregLo    = mkHReg(True, HRcInt32, 0, j++);
6925            hregMedLo = mkHReg(True, HRcInt32, 0, j++);
6926         }
6927         break;
6928      case Ity_I128:
6929         if (mode64) {
6930            hregLo    = mkHReg(True, HRcInt64, 0, j++);
6931            hregMedLo = mkHReg(True, HRcInt64, 0, j++);
6932         } else {
6933            hregLo    = mkHReg(True, HRcInt32, 0, j++);
6934            hregMedLo = mkHReg(True, HRcInt32, 0, j++);
6935            hregMedHi = mkHReg(True, HRcInt32, 0, j++);
6936            hregHi    = mkHReg(True, HRcInt32, 0, j++);
6937         }
6938         break;
6939      case Ity_F32:
6940      case Ity_F64:
6941         hregLo = mkHReg(True, HRcFlt64, 0, j++);
6942         break;
6943      case Ity_F128:
6944      case Ity_V128:
6945         hregLo = mkHReg(True, HRcVec128, 0, j++);
6946         break;
6947      case Ity_D32:
6948      case Ity_D64:
6949         hregLo = mkHReg(True, HRcFlt64, 0, j++);
6950         break;
6951      case Ity_D128:
6952         hregLo    = mkHReg(True, HRcFlt64, 0, j++);
6953         hregMedLo = mkHReg(True, HRcFlt64, 0, j++);
6954         break;
6955      default:
6956         ppIRType(bb->tyenv->types[i]);
6957         vpanic("iselBB(ppc): IRTemp type");
6958      }
6959      env->vregmapLo[i]    = hregLo;
6960      env->vregmapMedLo[i] = hregMedLo;
6961      if (!mode64) {
6962         env->vregmapMedHi[i] = hregMedHi;
6963         env->vregmapHi[i]    = hregHi;
6964      }
6965   }
6966   env->vreg_ctr = j;
6967
6968   /* The very first instruction must be an event check. */
6969   amCounter  = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
6970   amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
6971   addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
6972
6973   /* Possibly a block counter increment (for profiling).  At this
6974      point we don't know the address of the counter, so just pretend
6975      it is zero.  It will have to be patched later, but before this
6976      translation is used, by a call to LibVEX_patchProfCtr. */
6977   if (addProfInc) {
6978      addInstr(env, PPCInstr_ProfInc());
6979   }
6980
6981   /* Ok, finally we can iterate over the statements. */
6982   for (i = 0; i < bb->stmts_used; i++)
6983      iselStmt(env, bb->stmts[i], IEndianess);
6984
6985   iselNext(env, bb->next, bb->jumpkind, bb->offsIP, IEndianess);
6986
6987   /* record the number of vregs we used. */
6988   env->code->n_vregs = env->vreg_ctr;
6989   return env->code;
6990}
6991
6992
6993/*---------------------------------------------------------------*/
6994/*--- end                                     host_ppc_isel.c ---*/
6995/*---------------------------------------------------------------*/
6996