1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*---------------------------------------------------------------*/
4/*--- begin                                          ir_opt.c ---*/
5/*---------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2004-2017 OpenWorks LLP
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#include "libvex_basictypes.h"
38#include "libvex_ir.h"
39#include "libvex.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "ir_opt.h"
44
45
46/* Set to 1 for lots of debugging output. */
47#define DEBUG_IROPT 0
48
49/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50#define STATS_IROPT 0
51
52
53/* What iropt does, 29 Dec 04.
54
55   It takes an IRSB and produces a new one with the same meaning,
56   defined thus:
57
58   After execution of the new BB, all guest state and guest memory is
59   the same as after execution of the original.  This is true
60   regardless of how the block was exited (at the end vs side exit).
61
62   In addition, parts of the guest state will be identical to that
63   created by execution of the original at the following observation
64   points:
65
66   * In a dirty helper call, any parts of the guest state that the
67     helper states that it reads or modifies will be up to date.
68     Also, guest memory will be up to date.  Parts of the guest state
69     not marked as being read or modified by the helper cannot be
70     assumed to be up-to-date at the point where the helper is called.
71
72   * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73     The guest state is only up to date only as explained above
74     (i.e. at SB exits and as specified by dirty helper call).
75     Also, the stack pointer register is up to date at memory
76     exception points (as this is needed for the stack extension
77     logic in m_signals.c).
78
79   * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80     Immediately prior to any load or store, those parts of the guest
81     state marked as requiring precise exceptions will be up to date.
82     Also, guest memory will be up to date.  Parts of the guest state
83     not marked as requiring precise exceptions cannot be assumed to
84     be up-to-date at the point of the load/store.
85
86   * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87     Same as minimal, but all the guest state is up to date at memory
88     exception points.
89
90   * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91     Guest state is up to date at each instruction.
92
93   The relative order of loads and stores (including loads/stores of
94   guest memory done by dirty helpers annotated as such) is not
95   changed.  However, the relative order of loads with no intervening
96   stores/modifies may be changed.
97
98   Transformation order
99   ~~~~~~~~~~~~~~~~~~~~
100
101   There are three levels of optimisation, controlled by
102   vex_control.iropt_level.  Define first:
103
104   "Cheap transformations" are the following sequence:
105      * Redundant-Get removal
106      * Redundant-Put removal
107      * Constant propagation/folding
108      * Dead code removal
109      * Specialisation of clean helper functions
110      * Dead code removal
111
112   "Expensive transformations" are the following sequence:
113      * CSE
114      * Folding of add/sub chains
115      * Redundant-GetI removal
116      * Redundant-PutI removal
117      * Dead code removal
118
119   Then the transformations are as follows, as defined by
120   vex_control.iropt_level:
121
122   Level 0:
123      * Flatten into atomic form.
124
125   Level 1: the following sequence:
126      * Flatten into atomic form.
127      * Cheap transformations.
128
129   Level 2: the following sequence
130      * Flatten into atomic form.
131      * Cheap transformations.
132      * If block contains any floating or vector types, CSE.
133      * If block contains GetI or PutI, Expensive transformations.
134      * Try unrolling loops.  Three possible outcomes:
135        - No effect: do nothing more.
136        - Unrolled a loop, and block does not contain GetI or PutI:
137          Do: * CSE
138              * Dead code removal
139        - Unrolled a loop, and block contains GetI or PutI:
140          Do: * Expensive transformations
141              * Cheap transformations
142*/
143
144/* Implementation notes, 29 Dec 04.
145
146   TODO (important): I think rPutI removal ignores precise exceptions
147   and is therefore in a sense, wrong.  In the sense that PutIs are
148   assumed not to write parts of the guest state that we need to have
149   up-to-date at loads/stores.  So far on x86 guest that has not
150   mattered since indeed only the x87 FP registers and tags are
151   accessed using GetI/PutI, and there is no need so far for them to
152   be up to date at mem exception points.  The rPutI pass should be
153   fixed.
154
155   TODO: improve pessimistic handling of precise exceptions
156     in the tree builder.
157
158   TODO: check interaction of rGetI and dirty helpers.
159
160   F64i constants are treated differently from other constants.
161   They are not regarded as atoms, and instead lifted off and
162   bound to temps.  This allows them to participate in CSE, which
163   is important for getting good performance for x86 guest code.
164
165   CSE up F64 literals (already doing F64is)
166
167   CSE: consider carefully the requirement for precise exns
168        prior to making CSE any more aggressive.  */
169
170
171/*---------------------------------------------------------------*/
172/*--- Finite mappery, of a sort                               ---*/
173/*---------------------------------------------------------------*/
174
175/* General map from HWord-sized thing HWord-sized thing.  Could be by
176   hashing, but it's not clear whether or not this would really be any
177   faster. */
178
179typedef
180   struct {
181      Bool*  inuse;
182      HWord* key;
183      HWord* val;
184      Int    size;
185      Int    used;
186   }
187   HashHW;
188
189static HashHW* newHHW ( void )
190{
191   HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192   h->size   = 8;
193   h->used   = 0;
194   h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195   h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196   h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197   return h;
198}
199
200
201/* Look up key in the map. */
202
203static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204{
205   Int i;
206   /* vex_printf("lookupHHW(%llx)\n", key ); */
207   for (i = 0; i < h->used; i++) {
208      if (h->inuse[i] && h->key[i] == key) {
209         if (val)
210            *val = h->val[i];
211         return True;
212      }
213   }
214   return False;
215}
216
217
218/* Add key->val to the map.  Replaces any existing binding for key. */
219
220static void addToHHW ( HashHW* h, HWord key, HWord val )
221{
222   Int i, j;
223   /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224
225   /* Find and replace existing binding, if any. */
226   for (i = 0; i < h->used; i++) {
227      if (h->inuse[i] && h->key[i] == key) {
228         h->val[i] = val;
229         return;
230      }
231   }
232
233   /* Ensure a space is available. */
234   if (h->used == h->size) {
235      /* Copy into arrays twice the size. */
236      Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237      HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238      HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239      for (i = j = 0; i < h->size; i++) {
240         if (!h->inuse[i]) continue;
241         inuse2[j] = True;
242         key2[j] = h->key[i];
243         val2[j] = h->val[i];
244         j++;
245      }
246      h->used = j;
247      h->size *= 2;
248      h->inuse = inuse2;
249      h->key = key2;
250      h->val = val2;
251   }
252
253   /* Finally, add it. */
254   vassert(h->used < h->size);
255   h->inuse[h->used] = True;
256   h->key[h->used] = key;
257   h->val[h->used] = val;
258   h->used++;
259}
260
261
262/*---------------------------------------------------------------*/
263/*--- Flattening out a BB into atomic SSA form                ---*/
264/*---------------------------------------------------------------*/
265
266/* Non-critical helper, heuristic for reducing the number of tmp-tmp
267   copies made by flattening.  If in doubt return False. */
268
269static Bool isFlat ( IRExpr* e )
270{
271   if (e->tag == Iex_Get)
272      return True;
273   if (e->tag == Iex_Binop)
274      return toBool( isIRAtom(e->Iex.Binop.arg1)
275                     && isIRAtom(e->Iex.Binop.arg2) );
276   if (e->tag == Iex_Load)
277      return isIRAtom(e->Iex.Load.addr);
278   return False;
279}
280
281/* Flatten out 'ex' so it is atomic, returning a new expression with
282   the same value, after having appended extra IRTemp assignments to
283   the end of 'bb'. */
284
285static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286{
287   Int i;
288   IRExpr** newargs;
289   IRType ty = typeOfIRExpr(bb->tyenv, ex);
290   IRTemp t1;
291
292   switch (ex->tag) {
293
294      case Iex_GetI:
295         t1 = newIRTemp(bb->tyenv, ty);
296         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297            IRExpr_GetI(ex->Iex.GetI.descr,
298                        flatten_Expr(bb, ex->Iex.GetI.ix),
299                        ex->Iex.GetI.bias)));
300         return IRExpr_RdTmp(t1);
301
302      case Iex_Get:
303         t1 = newIRTemp(bb->tyenv, ty);
304         addStmtToIRSB(bb,
305            IRStmt_WrTmp(t1, ex));
306         return IRExpr_RdTmp(t1);
307
308      case Iex_Qop: {
309         IRQop* qop = ex->Iex.Qop.details;
310         t1 = newIRTemp(bb->tyenv, ty);
311         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312            IRExpr_Qop(qop->op,
313                         flatten_Expr(bb, qop->arg1),
314                         flatten_Expr(bb, qop->arg2),
315                         flatten_Expr(bb, qop->arg3),
316                         flatten_Expr(bb, qop->arg4))));
317         return IRExpr_RdTmp(t1);
318      }
319
320      case Iex_Triop: {
321         IRTriop* triop = ex->Iex.Triop.details;
322         t1 = newIRTemp(bb->tyenv, ty);
323         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324            IRExpr_Triop(triop->op,
325                         flatten_Expr(bb, triop->arg1),
326                         flatten_Expr(bb, triop->arg2),
327                         flatten_Expr(bb, triop->arg3))));
328         return IRExpr_RdTmp(t1);
329      }
330
331      case Iex_Binop:
332         t1 = newIRTemp(bb->tyenv, ty);
333         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334            IRExpr_Binop(ex->Iex.Binop.op,
335                         flatten_Expr(bb, ex->Iex.Binop.arg1),
336                         flatten_Expr(bb, ex->Iex.Binop.arg2))));
337         return IRExpr_RdTmp(t1);
338
339      case Iex_Unop:
340         t1 = newIRTemp(bb->tyenv, ty);
341         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342            IRExpr_Unop(ex->Iex.Unop.op,
343                        flatten_Expr(bb, ex->Iex.Unop.arg))));
344         return IRExpr_RdTmp(t1);
345
346      case Iex_Load:
347         t1 = newIRTemp(bb->tyenv, ty);
348         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349            IRExpr_Load(ex->Iex.Load.end,
350                        ex->Iex.Load.ty,
351                        flatten_Expr(bb, ex->Iex.Load.addr))));
352         return IRExpr_RdTmp(t1);
353
354      case Iex_CCall:
355         newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356         for (i = 0; newargs[i]; i++)
357            newargs[i] = flatten_Expr(bb, newargs[i]);
358         t1 = newIRTemp(bb->tyenv, ty);
359         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360            IRExpr_CCall(ex->Iex.CCall.cee,
361                         ex->Iex.CCall.retty,
362                         newargs)));
363         return IRExpr_RdTmp(t1);
364
365      case Iex_ITE:
366         t1 = newIRTemp(bb->tyenv, ty);
367         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368            IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369                       flatten_Expr(bb, ex->Iex.ITE.iftrue),
370                       flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371         return IRExpr_RdTmp(t1);
372
373      case Iex_Const:
374         /* Lift F64i constants out onto temps so they can be CSEd
375            later. */
376         if (ex->Iex.Const.con->tag == Ico_F64i) {
377            t1 = newIRTemp(bb->tyenv, ty);
378            addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379               IRExpr_Const(ex->Iex.Const.con)));
380            return IRExpr_RdTmp(t1);
381         } else {
382            /* Leave all other constants alone. */
383            return ex;
384         }
385
386      case Iex_RdTmp:
387         return ex;
388
389      default:
390         vex_printf("\n");
391         ppIRExpr(ex);
392         vex_printf("\n");
393         vpanic("flatten_Expr");
394   }
395}
396
397
398/* Append a completely flattened form of 'st' to the end of 'bb'. */
399
400static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401{
402   Int i;
403   IRExpr   *e1, *e2, *e3, *e4, *e5;
404   IRDirty  *d,  *d2;
405   IRCAS    *cas, *cas2;
406   IRPutI   *puti, *puti2;
407   IRLoadG  *lg;
408   IRStoreG *sg;
409   switch (st->tag) {
410      case Ist_Put:
411         if (isIRAtom(st->Ist.Put.data)) {
412            /* optimisation to reduce the amount of heap wasted
413               by the flattener */
414            addStmtToIRSB(bb, st);
415         } else {
416            /* general case, always correct */
417            e1 = flatten_Expr(bb, st->Ist.Put.data);
418            addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419         }
420         break;
421      case Ist_PutI:
422         puti = st->Ist.PutI.details;
423         e1 = flatten_Expr(bb, puti->ix);
424         e2 = flatten_Expr(bb, puti->data);
425         puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426         addStmtToIRSB(bb, IRStmt_PutI(puti2));
427         break;
428      case Ist_WrTmp:
429         if (isFlat(st->Ist.WrTmp.data)) {
430            /* optimisation, to reduce the number of tmp-tmp
431               copies generated */
432            addStmtToIRSB(bb, st);
433         } else {
434            /* general case, always correct */
435            e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436            addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437         }
438         break;
439      case Ist_Store:
440         e1 = flatten_Expr(bb, st->Ist.Store.addr);
441         e2 = flatten_Expr(bb, st->Ist.Store.data);
442         addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443         break;
444      case Ist_StoreG:
445         sg = st->Ist.StoreG.details;
446         e1 = flatten_Expr(bb, sg->addr);
447         e2 = flatten_Expr(bb, sg->data);
448         e3 = flatten_Expr(bb, sg->guard);
449         addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450         break;
451      case Ist_LoadG:
452         lg = st->Ist.LoadG.details;
453         e1 = flatten_Expr(bb, lg->addr);
454         e2 = flatten_Expr(bb, lg->alt);
455         e3 = flatten_Expr(bb, lg->guard);
456         addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457                                        e1, e2, e3));
458         break;
459      case Ist_CAS:
460         cas  = st->Ist.CAS.details;
461         e1   = flatten_Expr(bb, cas->addr);
462         e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463         e3   = flatten_Expr(bb, cas->expdLo);
464         e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465         e5   = flatten_Expr(bb, cas->dataLo);
466         cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467                         e1, e2, e3, e4, e5 );
468         addStmtToIRSB(bb, IRStmt_CAS(cas2));
469         break;
470      case Ist_LLSC:
471         e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472         e2 = st->Ist.LLSC.storedata
473                 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474                 : NULL;
475         addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476                                       st->Ist.LLSC.result, e1, e2));
477         break;
478      case Ist_Dirty:
479         d = st->Ist.Dirty.details;
480         d2 = emptyIRDirty();
481         *d2 = *d;
482         d2->args = shallowCopyIRExprVec(d2->args);
483         if (d2->mFx != Ifx_None) {
484            d2->mAddr = flatten_Expr(bb, d2->mAddr);
485         } else {
486            vassert(d2->mAddr == NULL);
487         }
488         d2->guard = flatten_Expr(bb, d2->guard);
489         for (i = 0; d2->args[i]; i++) {
490            IRExpr* arg = d2->args[i];
491            if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
492               d2->args[i] = flatten_Expr(bb, arg);
493         }
494         addStmtToIRSB(bb, IRStmt_Dirty(d2));
495         break;
496      case Ist_NoOp:
497      case Ist_MBE:
498      case Ist_IMark:
499         addStmtToIRSB(bb, st);
500         break;
501      case Ist_AbiHint:
502         e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505         break;
506      case Ist_Exit:
507         e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508         addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509                                       st->Ist.Exit.dst,
510                                       st->Ist.Exit.offsIP));
511         break;
512      default:
513         vex_printf("\n");
514         ppIRStmt(st);
515         vex_printf("\n");
516         vpanic("flatten_Stmt");
517   }
518}
519
520
521static IRSB* flatten_BB ( IRSB* in )
522{
523   Int   i;
524   IRSB* out;
525   out = emptyIRSB();
526   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527   for (i = 0; i < in->stmts_used; i++)
528      if (in->stmts[i])
529         flatten_Stmt( out, in->stmts[i] );
530   out->next     = flatten_Expr( out, in->next );
531   out->jumpkind = in->jumpkind;
532   out->offsIP   = in->offsIP;
533   return out;
534}
535
536
537/*---------------------------------------------------------------*/
538/*--- In-place removal of redundant GETs                      ---*/
539/*---------------------------------------------------------------*/
540
541/* Scan forwards, building up an environment binding (min offset, max
542   offset) pairs to values, which will either be temps or constants.
543
544   On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545   env and if it matches, replace the Get with the stored value.  If
546   there is no match, add a (minoff,maxoff) :-> t binding.
547
548   On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549   any binding which fully or partially overlaps with (minoff,maxoff).
550   Then add a new (minoff,maxoff) :-> t or c binding.  */
551
552/* Extract the min/max offsets from a guest state array descriptor. */
553
554inline
555static void getArrayBounds ( IRRegArray* descr,
556                             UInt* minoff, UInt* maxoff )
557{
558   *minoff = descr->base;
559   *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560   vassert((*minoff & ~0xFFFF) == 0);
561   vassert((*maxoff & ~0xFFFF) == 0);
562   vassert(*minoff <= *maxoff);
563}
564
565/* Create keys, of the form ((minoffset << 16) | maxoffset). */
566
567static UInt mk_key_GetPut ( Int offset, IRType ty )
568{
569   /* offset should fit in 16 bits. */
570   UInt minoff = offset;
571   UInt maxoff = minoff + sizeofIRType(ty) - 1;
572   vassert((minoff & ~0xFFFF) == 0);
573   vassert((maxoff & ~0xFFFF) == 0);
574   return (minoff << 16) | maxoff;
575}
576
577static UInt mk_key_GetIPutI ( IRRegArray* descr )
578{
579   UInt minoff, maxoff;
580   getArrayBounds( descr, &minoff, &maxoff );
581   vassert((minoff & ~0xFFFF) == 0);
582   vassert((maxoff & ~0xFFFF) == 0);
583   return (minoff << 16) | maxoff;
584}
585
586/* Supposing h has keys of the form generated by mk_key_GetPut and
587   mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588   .. k_hi).
589*/
590static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591{
592   Int  j;
593   UInt e_lo, e_hi;
594   vassert(k_lo <= k_hi);
595   /* invalidate any env entries which in any way overlap (k_lo
596      .. k_hi) */
597   /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598
599   for (j = 0; j < h->used; j++) {
600      if (!h->inuse[j])
601         continue;
602      e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603      e_hi = ((UInt)h->key[j]) & 0xFFFF;
604      vassert(e_lo <= e_hi);
605      if (e_hi < k_lo || k_hi < e_lo)
606         continue; /* no overlap possible */
607      else
608         /* overlap; invalidate */
609         h->inuse[j] = False;
610   }
611}
612
613
614static void redundant_get_removal_BB ( IRSB* bb )
615{
616   HashHW* env = newHHW();
617   UInt    key = 0; /* keep gcc -O happy */
618   Int     i, j;
619   HWord   val;
620
621   for (i = 0; i < bb->stmts_used; i++) {
622      IRStmt* st = bb->stmts[i];
623
624      if (st->tag == Ist_NoOp)
625         continue;
626
627      /* Deal with Gets */
628      if (st->tag == Ist_WrTmp
629          && st->Ist.WrTmp.data->tag == Iex_Get) {
630         /* st is 't = Get(...)'.  Look up in the environment and see
631            if the Get can be replaced. */
632         IRExpr* get = st->Ist.WrTmp.data;
633         key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634                                     get->Iex.Get.ty );
635         if (lookupHHW(env, &val, (HWord)key)) {
636            /* found it */
637            /* Note, we could do better here.  If the types are
638               different we don't do the substitution, since doing so
639               could lead to invalidly-typed IR.  An improvement would
640               be to stick in a reinterpret-style cast, although that
641               would make maintaining flatness more difficult. */
642            IRExpr* valE    = (IRExpr*)val;
643            Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644                                      == st->Ist.WrTmp.data->Iex.Get.ty );
645            if (typesOK && DEBUG_IROPT) {
646               vex_printf("rGET: "); ppIRExpr(get);
647               vex_printf("  ->  "); ppIRExpr(valE);
648               vex_printf("\n");
649            }
650            if (typesOK)
651               bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652         } else {
653            /* Not found, but at least we know that t and the Get(...)
654               are now associated.  So add a binding to reflect that
655               fact. */
656            addToHHW( env, (HWord)key,
657                           (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658         }
659      }
660
661      /* Deal with Puts: invalidate any env entries overlapped by this
662         Put */
663      if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664         UInt k_lo, k_hi;
665         if (st->tag == Ist_Put) {
666            key = mk_key_GetPut( st->Ist.Put.offset,
667                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668         } else {
669            vassert(st->tag == Ist_PutI);
670            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671         }
672
673         k_lo = (key >> 16) & 0xFFFF;
674         k_hi = key & 0xFFFF;
675         invalidateOverlaps(env, k_lo, k_hi);
676      }
677      else
678      if (st->tag == Ist_Dirty) {
679         /* Deal with dirty helpers which write or modify guest state.
680            Invalidate the entire env.  We could do a lot better
681            here. */
682         IRDirty* d      = st->Ist.Dirty.details;
683         Bool     writes = False;
684         for (j = 0; j < d->nFxState; j++) {
685            if (d->fxState[j].fx == Ifx_Modify
686                || d->fxState[j].fx == Ifx_Write)
687            writes = True;
688         }
689         if (writes) {
690            /* dump the entire env (not clever, but correct ...) */
691            for (j = 0; j < env->used; j++)
692               env->inuse[j] = False;
693            if (0) vex_printf("rGET: trash env due to dirty helper\n");
694         }
695      }
696
697      /* add this one to the env, if appropriate */
698      if (st->tag == Ist_Put) {
699         vassert(isIRAtom(st->Ist.Put.data));
700         addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701      }
702
703   } /* for (i = 0; i < bb->stmts_used; i++) */
704
705}
706
707
708/*---------------------------------------------------------------*/
709/*--- In-place removal of redundant PUTs                      ---*/
710/*---------------------------------------------------------------*/
711
712/* Find any Get uses in st and invalidate any partially or fully
713   overlapping ranges listed in env.  Due to the flattening phase, the
714   only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715
716static void handle_gets_Stmt (
717               HashHW* env,
718               IRStmt* st,
719               Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720               VexRegisterUpdates pxControl
721            )
722{
723   Int     j;
724   UInt    key = 0; /* keep gcc -O happy */
725   Bool    isGet;
726   Bool    memRW = False;
727   IRExpr* e;
728
729   switch (st->tag) {
730
731      /* This is the only interesting case.  Deal with Gets in the RHS
732         expression. */
733      case Ist_WrTmp:
734         e = st->Ist.WrTmp.data;
735         switch (e->tag) {
736            case Iex_Get:
737               isGet = True;
738               key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739               break;
740            case Iex_GetI:
741               isGet = True;
742               key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743               break;
744            case Iex_Load:
745               isGet = False;
746               memRW = True;
747               break;
748            default:
749               isGet = False;
750         }
751         if (isGet) {
752            UInt k_lo, k_hi;
753            k_lo = (key >> 16) & 0xFFFF;
754            k_hi = key & 0xFFFF;
755            invalidateOverlaps(env, k_lo, k_hi);
756         }
757         break;
758
759      /* Be very conservative for dirty helper calls; dump the entire
760         environment.  The helper might read guest state, in which
761         case it needs to be flushed first.  Also, the helper might
762         access guest memory, in which case all parts of the guest
763         state requiring precise exceptions needs to be flushed.  The
764         crude solution is just to flush everything; we could easily
765         enough do a lot better if needed. */
766      /* Probably also overly-conservative, but also dump everything
767         if we hit a memory bus event (fence, lock, unlock).  Ditto
768         AbiHints, CASs, LLs and SCs. */
769      case Ist_AbiHint:
770         vassert(isIRAtom(st->Ist.AbiHint.base));
771         vassert(isIRAtom(st->Ist.AbiHint.nia));
772         /* fall through */
773      case Ist_MBE:
774      case Ist_Dirty:
775      case Ist_CAS:
776      case Ist_LLSC:
777         for (j = 0; j < env->used; j++)
778            env->inuse[j] = False;
779         break;
780
781      /* all other cases are boring. */
782      case Ist_Store:
783         vassert(isIRAtom(st->Ist.Store.addr));
784         vassert(isIRAtom(st->Ist.Store.data));
785         memRW = True;
786         break;
787      case Ist_StoreG: {
788         IRStoreG* sg = st->Ist.StoreG.details;
789         vassert(isIRAtom(sg->addr));
790         vassert(isIRAtom(sg->data));
791         vassert(isIRAtom(sg->guard));
792         memRW = True;
793         break;
794      }
795      case Ist_LoadG: {
796         IRLoadG* lg = st->Ist.LoadG.details;
797         vassert(isIRAtom(lg->addr));
798         vassert(isIRAtom(lg->alt));
799         vassert(isIRAtom(lg->guard));
800         memRW = True;
801         break;
802      }
803      case Ist_Exit:
804         vassert(isIRAtom(st->Ist.Exit.guard));
805         break;
806
807      case Ist_Put:
808         vassert(isIRAtom(st->Ist.Put.data));
809         break;
810
811      case Ist_PutI:
812         vassert(isIRAtom(st->Ist.PutI.details->ix));
813         vassert(isIRAtom(st->Ist.PutI.details->data));
814         break;
815
816      case Ist_NoOp:
817      case Ist_IMark:
818         break;
819
820      default:
821         vex_printf("\n");
822         ppIRStmt(st);
823         vex_printf("\n");
824         vpanic("handle_gets_Stmt");
825   }
826
827   if (memRW) {
828      /* This statement accesses memory.  So we might need to dump all parts
829         of the environment corresponding to guest state that may not
830         be reordered with respect to memory references.  That means
831         at least the stack pointer. */
832      switch (pxControl) {
833         case VexRegUpdAllregsAtMemAccess:
834            /* Precise exceptions required at mem access.
835               Flush all guest state. */
836            for (j = 0; j < env->used; j++)
837               env->inuse[j] = False;
838            break;
839         case VexRegUpdSpAtMemAccess:
840            /* We need to dump the stack pointer
841               (needed for stack extension in m_signals.c).
842               preciseMemExnsFn will use vex_control.iropt_register_updates
843               to verify only the sp is to be checked. */
844            /* fallthrough */
845         case VexRegUpdUnwindregsAtMemAccess:
846            for (j = 0; j < env->used; j++) {
847               if (!env->inuse[j])
848                  continue;
849               /* Just flush the minimal amount required, as computed by
850                  preciseMemExnsFn. */
851               HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852               HWord k_hi = env->key[j] & 0xFFFF;
853               if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854                  env->inuse[j] = False;
855            }
856            break;
857         case VexRegUpdAllregsAtEachInsn:
858            // VexRegUpdAllregsAtEachInsn cannot happen here.
859            // fall through
860         case VexRegUpd_INVALID:
861         default:
862            vassert(0);
863      }
864   } /* if (memRW) */
865
866}
867
868
869/* Scan backwards, building up a set of (min offset, max
870   offset) pairs, indicating those parts of the guest state
871   for which the next event is a write.
872
873   On seeing a conditional exit, empty the set.
874
875   On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876   completely within the set, remove the Put.  Otherwise, add
877   (minoff,maxoff) to the set.
878
879   On seeing 'Get (minoff,maxoff)', remove any part of the set
880   overlapping (minoff,maxoff).  The same has to happen for any events
881   which implicitly read parts of the guest state: dirty helper calls
882   and loads/stores.
883*/
884
885static void redundant_put_removal_BB (
886               IRSB* bb,
887               Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888               VexRegisterUpdates pxControl
889            )
890{
891   Int     i, j;
892   Bool    isPut;
893   IRStmt* st;
894   UInt    key = 0; /* keep gcc -O happy */
895
896   vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897
898   HashHW* env = newHHW();
899
900   /* Initialise the running env with the fact that the final exit
901      writes the IP (or, whatever it claims to write.  We don't
902      care.) */
903   key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904   addToHHW(env, (HWord)key, 0);
905
906   /* And now scan backwards through the statements. */
907   for (i = bb->stmts_used-1; i >= 0; i--) {
908      st = bb->stmts[i];
909
910      if (st->tag == Ist_NoOp)
911         continue;
912
913      /* Deal with conditional exits. */
914      if (st->tag == Ist_Exit) {
915         //Bool re_add;
916         /* Need to throw out from the env, any part of it which
917            doesn't overlap with the guest state written by this exit.
918            Since the exit only writes one section, it's simplest to
919            do this: (1) check whether env contains a write that
920            completely overlaps the write done by this exit; (2) empty
921            out env; and (3) if (1) was true, add the write done by
922            this exit.
923
924            To make (1) a bit simpler, merely search for a write that
925            exactly matches the one done by this exit.  That's safe
926            because it will fail as often or more often than a full
927            overlap check, and failure to find an overlapping write in
928            env is the safe case (we just nuke env if that
929            happens). */
930         //vassert(isIRAtom(st->Ist.Exit.guard));
931         /* (1) */
932         //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933         //                    typeOfIRConst(st->Ist.Exit.dst));
934         //re_add = lookupHHW(env, NULL, key);
935         /* (2) */
936         for (j = 0; j < env->used; j++)
937            env->inuse[j] = False;
938         /* (3) */
939         //if (0 && re_add)
940         //   addToHHW(env, (HWord)key, 0);
941         continue;
942      }
943
944      /* Deal with Puts */
945      switch (st->tag) {
946         case Ist_Put:
947            isPut = True;
948            key = mk_key_GetPut( st->Ist.Put.offset,
949                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950            vassert(isIRAtom(st->Ist.Put.data));
951            break;
952         case Ist_PutI:
953            isPut = True;
954            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955            vassert(isIRAtom(st->Ist.PutI.details->ix));
956            vassert(isIRAtom(st->Ist.PutI.details->data));
957            break;
958         default:
959            isPut = False;
960      }
961      if (isPut && st->tag != Ist_PutI) {
962         /* See if any single entry in env overlaps this Put.  This is
963            simplistic in that the transformation is valid if, say, two
964            or more entries in the env overlap this Put, but the use of
965            lookupHHW will only find a single entry which exactly
966            overlaps this Put.  This is suboptimal but safe. */
967         if (lookupHHW(env, NULL, (HWord)key)) {
968            /* This Put is redundant because a later one will overwrite
969               it.  So NULL (nop) it out. */
970            if (DEBUG_IROPT) {
971               vex_printf("rPUT: "); ppIRStmt(st);
972               vex_printf("\n");
973            }
974            bb->stmts[i] = IRStmt_NoOp();
975         } else {
976            /* We can't demonstrate that this Put is redundant, so add it
977               to the running collection. */
978            addToHHW(env, (HWord)key, 0);
979         }
980         continue;
981      }
982
983      /* Deal with Gets.  These remove bits of the environment since
984         appearance of a Get means that the next event for that slice
985         of the guest state is no longer a write, but a read.  Also
986         deals with implicit reads of guest state needed to maintain
987         precise exceptions. */
988      handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989   }
990}
991
992
993/*---------------------------------------------------------------*/
994/*--- Constant propagation and folding                        ---*/
995/*---------------------------------------------------------------*/
996
997#if STATS_IROPT
998/* How often sameIRExprs was invoked */
999static UInt invocation_count;
1000/* How often sameIRExprs recursed through IRTemp assignments */
1001static UInt recursion_count;
1002/* How often sameIRExprs found identical IRExprs */
1003static UInt success_count;
1004/* How often recursing through assignments to IRTemps helped
1005   establishing equality. */
1006static UInt recursion_success_count;
1007/* Whether or not recursing through an IRTemp assignment helped
1008   establishing IRExpr equality for a given sameIRExprs invocation. */
1009static Bool recursion_helped;
1010/* Whether or not a given sameIRExprs invocation recursed through an
1011   IRTemp assignment */
1012static Bool recursed;
1013/* Maximum number of nodes ever visited when comparing two IRExprs. */
1014static UInt max_nodes_visited;
1015#endif /* STATS_IROPT */
1016
1017/* Count the number of nodes visited for a given sameIRExprs invocation. */
1018static UInt num_nodes_visited;
1019
1020/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021   This is to guard against performance degradation by visiting large
1022   trees without success. */
1023#define NODE_LIMIT 30
1024
1025
1026/* The env in this section is a map from IRTemp to IRExpr*,
1027   that is, an array indexed by IRTemp. */
1028
1029/* Do both expressions compute the same value? The answer is generally
1030   conservative, i.e. it will report that the expressions do not compute
1031   the same value when in fact they do. The reason is that we do not
1032   keep track of changes in the guest state and memory. Thusly, two
1033   Get's, GetI's or Load's, even when accessing the same location, will be
1034   assumed to compute different values. After all the accesses may happen
1035   at different times and the guest state / memory can have changed in
1036   the meantime.
1037
1038   XXX IMPORTANT XXX the two expressions must have the same IR type.
1039   DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040
1041/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042   wrapper that deals with the common tags-don't-match case, and a
1043   slower out of line general case.  Saves a few insns. */
1044
1045__attribute__((noinline))
1046static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047
1048inline
1049static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050{
1051   if (e1->tag != e2->tag) return False;
1052   return sameIRExprs_aux2(env, e1, e2);
1053}
1054
1055__attribute__((noinline))
1056static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057{
1058   if (num_nodes_visited++ > NODE_LIMIT) return False;
1059
1060   switch (e1->tag) {
1061      case Iex_RdTmp:
1062         if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063
1064         if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065            Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066                                        env[e2->Iex.RdTmp.tmp]);
1067#if STATS_IROPT
1068            recursed = True;
1069            if (same) recursion_helped = True;
1070#endif
1071            return same;
1072         }
1073         return False;
1074
1075      case Iex_Get:
1076      case Iex_GetI:
1077      case Iex_Load:
1078         /* Guest state / memory could have changed in the meantime. */
1079         return False;
1080
1081      case Iex_Binop:
1082         return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083                        && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084                                                 e2->Iex.Binop.arg1 )
1085                        && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086                                                 e2->Iex.Binop.arg2 ));
1087
1088      case Iex_Unop:
1089         return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090                        && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091                                                 e2->Iex.Unop.arg ));
1092
1093      case Iex_Const: {
1094         IRConst *c1 = e1->Iex.Const.con;
1095         IRConst *c2 = e2->Iex.Const.con;
1096         vassert(c1->tag == c2->tag);
1097         switch (c1->tag) {
1098            case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
1099            case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
1100            case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101            case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102            case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103            default: break;
1104         }
1105         return False;
1106      }
1107
1108      case Iex_Triop: {
1109         IRTriop *tri1 = e1->Iex.Triop.details;
1110         IRTriop *tri2 = e2->Iex.Triop.details;
1111         return toBool( tri1->op == tri2->op
1112                        && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113                        && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114                        && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115      }
1116
1117      case Iex_ITE:
1118         return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119                                                 e2->Iex.ITE.cond )
1120                        && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121                                                 e2->Iex.ITE.iftrue )
1122                        && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123                                                 e2->Iex.ITE.iffalse ));
1124
1125      default:
1126         /* Not very likely to be "same". */
1127         break;
1128   }
1129
1130   return False;
1131}
1132
1133inline
1134static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135{
1136   Bool same;
1137
1138   num_nodes_visited = 0;
1139   same = sameIRExprs_aux(env, e1, e2);
1140
1141#if STATS_IROPT
1142   ++invocation_count;
1143   if (recursed) ++recursion_count;
1144   success_count += same;
1145   if (same && recursion_helped)
1146      ++recursion_success_count;
1147   if (num_nodes_visited > max_nodes_visited)
1148      max_nodes_visited = num_nodes_visited;
1149   recursed = False; /* reset */
1150   recursion_helped = False;  /* reset */
1151#endif /* STATS_IROPT */
1152
1153   return same;
1154}
1155
1156
1157/* Debugging-only hack (not used in production runs): make a guess
1158   whether sameIRExprs might assert due to the two args being of
1159   different types.  If in doubt return False.  Is only used when
1160   --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161   Bad because it duplicates functionality from typeOfIRExpr.  See
1162   comment on the single use point below for rationale. */
1163static
1164Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165{
1166   if (e1->tag != e2->tag) return False;
1167   switch (e1->tag) {
1168      case Iex_Const: {
1169         /* The only interesting case */
1170         IRConst *c1 = e1->Iex.Const.con;
1171         IRConst *c2 = e2->Iex.Const.con;
1172         return c1->tag != c2->tag;
1173      }
1174      default:
1175         break;
1176   }
1177   return False;
1178}
1179
1180
1181/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1182static Bool isZeroU32 ( IRExpr* e )
1183{
1184   return toBool( e->tag == Iex_Const
1185                  && e->Iex.Const.con->tag == Ico_U32
1186                  && e->Iex.Const.con->Ico.U32 == 0);
1187}
1188
1189/* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190   Currently unused; commented out to avoid compiler warning */
1191#if 0
1192static Bool isZeroU64 ( IRExpr* e )
1193{
1194   return toBool( e->tag == Iex_Const
1195                  && e->Iex.Const.con->tag == Ico_U64
1196                  && e->Iex.Const.con->Ico.U64 == 0);
1197}
1198#endif
1199
1200/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
1201static Bool isZeroV128 ( IRExpr* e )
1202{
1203   return toBool( e->tag == Iex_Const
1204                  && e->Iex.Const.con->tag == Ico_V128
1205                  && e->Iex.Const.con->Ico.V128 == 0x0000);
1206}
1207
1208/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
1209static Bool isZeroV256 ( IRExpr* e )
1210{
1211   return toBool( e->tag == Iex_Const
1212                  && e->Iex.Const.con->tag == Ico_V256
1213                  && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214}
1215
1216/* Is this an integer constant with value 0 ? */
1217static Bool isZeroU ( IRExpr* e )
1218{
1219   if (e->tag != Iex_Const) return False;
1220   switch (e->Iex.Const.con->tag) {
1221      case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
1222      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
1223      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226      case Ico_V256:  return toBool( e->Iex.Const.con->Ico.V256 == 0x00000000);
1227      default: vpanic("isZeroU");
1228   }
1229}
1230
1231/* Is this an integer constant with value 1---1b ? */
1232static Bool isOnesU ( IRExpr* e )
1233{
1234   if (e->tag != Iex_Const) return False;
1235   switch (e->Iex.Const.con->tag) {
1236      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
1237      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1238      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
1239                                     == 0xFFFFFFFF);
1240      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
1241                                     == 0xFFFFFFFFFFFFFFFFULL);
1242      default: ppIRExpr(e); vpanic("isOnesU");
1243   }
1244}
1245
1246static Bool notBool ( Bool b )
1247{
1248   if (b == True) return False;
1249   if (b == False) return True;
1250   vpanic("notBool");
1251}
1252
1253/* Make a zero which has the same type as the result of the given
1254   primop. */
1255static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1256{
1257   switch (op) {
1258      case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1259      case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
1260      case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1261      case Iop_Sub32:
1262      case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1263      case Iop_And64:
1264      case Iop_Sub64:
1265      case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1266      case Iop_XorV128:
1267      case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1268      case Iop_XorV256:
1269      case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1270      default: vpanic("mkZeroOfPrimopResultType: bad primop");
1271   }
1272}
1273
1274/* Make a value containing all 1-bits, which has the same type as the
1275   result of the given primop. */
1276static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1277{
1278   switch (op) {
1279      case Iop_CmpEQ32:
1280      case Iop_CmpEQ64:
1281         return IRExpr_Const(IRConst_U1(toBool(1)));
1282      case Iop_Or8:
1283         return IRExpr_Const(IRConst_U8(0xFF));
1284      case Iop_Or16:
1285         return IRExpr_Const(IRConst_U16(0xFFFF));
1286      case Iop_Or32:
1287         return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1288      case Iop_CmpEQ8x8:
1289      case Iop_Or64:
1290         return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1291      case Iop_CmpEQ8x16:
1292      case Iop_CmpEQ16x8:
1293      case Iop_CmpEQ32x4:
1294         return IRExpr_Const(IRConst_V128(0xFFFF));
1295      default:
1296         ppIROp(op);
1297         vpanic("mkOnesOfPrimopResultType: bad primop");
1298   }
1299}
1300
1301/* Helpers for folding Clz32/64. */
1302static UInt fold_Clz64 ( ULong value )
1303{
1304   UInt i;
1305   vassert(value != 0ULL); /* no defined semantics for arg==0 */
1306   for (i = 0; i < 64; ++i) {
1307      if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1308   }
1309   vassert(0);
1310   /*NOTREACHED*/
1311   return 0;
1312}
1313
1314static UInt fold_Clz32 ( UInt value )
1315{
1316   UInt i;
1317   vassert(value != 0); /* no defined semantics for arg==0 */
1318   for (i = 0; i < 32; ++i) {
1319      if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1320   }
1321   vassert(0);
1322   /*NOTREACHED*/
1323   return 0;
1324}
1325
1326/* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
1327   the corresponding real constant. */
1328//XXX re-check this before use
1329//static ULong de_summarise_V64 ( UChar v64 )
1330//{
1331//   ULong r = 0;
1332//   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1333//   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1334//   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1335//   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1336//   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1337//   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1338//   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1339//   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1340//   return r;
1341//}
1342
1343/* Helper for arbitrary expression pattern matching in flat IR.  If
1344   'e' is a reference to a tmp, look it up in env -- repeatedly, if
1345   necessary -- until it resolves to a non-tmp.  Note that this can
1346   return NULL if it can't resolve 'e' to a new expression, which will
1347   be the case if 'e' is instead defined by an IRStmt (IRDirty or
1348   LLSC). */
1349static IRExpr* chase ( IRExpr** env, IRExpr* e )
1350{
1351   /* Why is this loop guaranteed to terminate?  Because all tmps must
1352      have definitions before use, hence a tmp cannot be bound
1353      (directly or indirectly) to itself. */
1354   while (e->tag == Iex_RdTmp) {
1355      if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1356      e = env[(Int)e->Iex.RdTmp.tmp];
1357      if (e == NULL) break;
1358   }
1359   return e;
1360}
1361
1362/* Similar to |chase|, but follows at most one level of tmp reference. */
1363static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1364{
1365   if (e == NULL || e->tag != Iex_RdTmp)
1366      return e;
1367   else
1368      return env[(Int)e->Iex.RdTmp.tmp];
1369}
1370
1371static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1372{
1373   Int     shift;
1374   IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1375
1376   switch (e->tag) {
1377   case Iex_Unop:
1378      /* UNARY ops */
1379      if (e->Iex.Unop.arg->tag == Iex_Const) {
1380         switch (e->Iex.Unop.op) {
1381         case Iop_1Uto8:
1382            e2 = IRExpr_Const(IRConst_U8(toUChar(
1383                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1384                    ? 1 : 0)));
1385            break;
1386         case Iop_1Uto32:
1387            e2 = IRExpr_Const(IRConst_U32(
1388                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1389                    ? 1 : 0));
1390            break;
1391         case Iop_1Uto64:
1392            e2 = IRExpr_Const(IRConst_U64(
1393                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1394                    ? 1 : 0));
1395            break;
1396
1397         case Iop_1Sto8:
1398            e2 = IRExpr_Const(IRConst_U8(toUChar(
1399                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1400                    ? 0xFF : 0)));
1401            break;
1402         case Iop_1Sto16:
1403            e2 = IRExpr_Const(IRConst_U16(toUShort(
1404                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1405                    ? 0xFFFF : 0)));
1406            break;
1407         case Iop_1Sto32:
1408            e2 = IRExpr_Const(IRConst_U32(
1409                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1410                    ? 0xFFFFFFFF : 0));
1411            break;
1412         case Iop_1Sto64:
1413            e2 = IRExpr_Const(IRConst_U64(
1414                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1415                    ? 0xFFFFFFFFFFFFFFFFULL : 0));
1416            break;
1417
1418         case Iop_8Sto32: {
1419            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1420            u32 <<= 24;
1421            u32 = (Int)u32 >> 24;   /* signed shift */
1422            e2 = IRExpr_Const(IRConst_U32(u32));
1423            break;
1424         }
1425         case Iop_16Sto32: {
1426            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1427            u32 <<= 16;
1428            u32 = (Int)u32 >> 16;   /* signed shift */
1429            e2 = IRExpr_Const(IRConst_U32(u32));
1430            break;
1431         }
1432         case Iop_8Uto64:
1433            e2 = IRExpr_Const(IRConst_U64(
1434                    0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1435            break;
1436         case Iop_16Uto64:
1437            e2 = IRExpr_Const(IRConst_U64(
1438                    0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1439            break;
1440         case Iop_8Uto32:
1441            e2 = IRExpr_Const(IRConst_U32(
1442                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1443            break;
1444         case Iop_8Sto16: {
1445            UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1446            u16 <<= 8;
1447            u16 = (Short)u16 >> 8;  /* signed shift */
1448            e2 = IRExpr_Const(IRConst_U16(u16));
1449            break;
1450         }
1451         case Iop_8Uto16:
1452            e2 = IRExpr_Const(IRConst_U16(
1453                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1454            break;
1455         case Iop_16Uto32:
1456            e2 = IRExpr_Const(IRConst_U32(
1457                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1458            break;
1459         case Iop_32to16:
1460            e2 = IRExpr_Const(IRConst_U16(toUShort(
1461                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1462            break;
1463         case Iop_32to8:
1464            e2 = IRExpr_Const(IRConst_U8(toUChar(
1465                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1466            break;
1467         case Iop_32to1:
1468            e2 = IRExpr_Const(IRConst_U1(toBool(
1469                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1470                 )));
1471            break;
1472         case Iop_64to1:
1473            e2 = IRExpr_Const(IRConst_U1(toBool(
1474                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1475                 )));
1476            break;
1477
1478         case Iop_NotV128:
1479            e2 = IRExpr_Const(IRConst_V128(
1480                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1481            break;
1482         case Iop_Not64:
1483            e2 = IRExpr_Const(IRConst_U64(
1484                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1485            break;
1486         case Iop_Not32:
1487            e2 = IRExpr_Const(IRConst_U32(
1488                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1489            break;
1490         case Iop_Not16:
1491            e2 = IRExpr_Const(IRConst_U16(toUShort(
1492                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1493            break;
1494         case Iop_Not8:
1495            e2 = IRExpr_Const(IRConst_U8(toUChar(
1496                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1497            break;
1498
1499         case Iop_Not1:
1500            e2 = IRExpr_Const(IRConst_U1(
1501                    notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1502            break;
1503
1504         case Iop_64to8: {
1505            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1506            w64 &= 0xFFULL;
1507            e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1508            break;
1509         }
1510         case Iop_64to16: {
1511            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1512            w64 &= 0xFFFFULL;
1513            e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1514            break;
1515         }
1516         case Iop_64to32: {
1517            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1518            w64 &= 0x00000000FFFFFFFFULL;
1519            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1520            break;
1521         }
1522         case Iop_64HIto32: {
1523            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1524            w64 >>= 32;
1525            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1526            break;
1527         }
1528         case Iop_32Uto64:
1529            e2 = IRExpr_Const(IRConst_U64(
1530                    0xFFFFFFFFULL
1531                    & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1532            break;
1533         case Iop_16Sto64: {
1534            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1535            u64 <<= 48;
1536            u64 = (Long)u64 >> 48;   /* signed shift */
1537            e2 = IRExpr_Const(IRConst_U64(u64));
1538            break;
1539         }
1540         case Iop_32Sto64: {
1541            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1542            u64 <<= 32;
1543            u64 = (Long)u64 >> 32;   /* signed shift */
1544            e2 = IRExpr_Const(IRConst_U64(u64));
1545            break;
1546         }
1547
1548         case Iop_16to8: {
1549            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1550            w16 &= 0xFF;
1551            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1552            break;
1553         }
1554         case Iop_16HIto8: {
1555            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1556            w16 >>= 8;
1557            w16 &= 0xFF;
1558            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1559            break;
1560         }
1561
1562         case Iop_CmpNEZ8:
1563            e2 = IRExpr_Const(IRConst_U1(toBool(
1564                    0 !=
1565                    (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1566                 )));
1567            break;
1568         case Iop_CmpNEZ32:
1569            e2 = IRExpr_Const(IRConst_U1(toBool(
1570                    0 !=
1571                    (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1572                 )));
1573            break;
1574         case Iop_CmpNEZ64:
1575            e2 = IRExpr_Const(IRConst_U1(toBool(
1576                    0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1577                 )));
1578            break;
1579
1580         case Iop_CmpwNEZ32: {
1581            UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1582            if (w32 == 0)
1583               e2 = IRExpr_Const(IRConst_U32( 0 ));
1584            else
1585               e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1586            break;
1587         }
1588         case Iop_CmpwNEZ64: {
1589            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1590            if (w64 == 0)
1591               e2 = IRExpr_Const(IRConst_U64( 0 ));
1592            else
1593               e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1594            break;
1595         }
1596
1597         case Iop_Left32: {
1598            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1599            Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1600            s32 = (s32 | (-s32));
1601            e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1602            break;
1603         }
1604
1605         case Iop_Left64: {
1606            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1607            Long  s64 = (Long)u64;
1608            s64 = (s64 | (-s64));
1609            e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1610            break;
1611         }
1612
1613         case Iop_Clz32: {
1614            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1615            if (u32 != 0)
1616               e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1617            break;
1618         }
1619         case Iop_Clz64: {
1620            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1621            if (u64 != 0ULL)
1622               e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1623            break;
1624         }
1625
1626         /* For these vector ones, can't fold all cases, but at least
1627            do the most obvious one.  Could do better here using
1628            summarise/desummarise of vector constants, but too
1629            difficult to verify; hence just handle the zero cases. */
1630         case Iop_32UtoV128: {
1631            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1632            if (0 == u32) {
1633               e2 = IRExpr_Const(IRConst_V128(0x0000));
1634            } else {
1635               goto unhandled;
1636            }
1637            break;
1638         }
1639         case Iop_V128to64: {
1640            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1641            if (0 == ((v128 >> 0) & 0xFF)) {
1642               e2 = IRExpr_Const(IRConst_U64(0));
1643            } else {
1644               goto unhandled;
1645            }
1646            break;
1647         }
1648         case Iop_V128HIto64: {
1649            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1650            if (0 == ((v128 >> 8) & 0xFF)) {
1651               e2 = IRExpr_Const(IRConst_U64(0));
1652            } else {
1653               goto unhandled;
1654            }
1655            break;
1656         }
1657         case Iop_64UtoV128: {
1658            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1659            if (0 == u64) {
1660               e2 = IRExpr_Const(IRConst_V128(0x0000));
1661            } else {
1662               goto unhandled;
1663            }
1664            break;
1665         }
1666
1667         /* Even stupider (although still correct ..) */
1668         case Iop_V256to64_0: case Iop_V256to64_1:
1669         case Iop_V256to64_2: case Iop_V256to64_3: {
1670            UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1671            if (v256 == 0x00000000) {
1672               e2 = IRExpr_Const(IRConst_U64(0));
1673            } else {
1674               goto unhandled;
1675            }
1676            break;
1677         }
1678
1679         case Iop_ZeroHI64ofV128: {
1680            /* Could do better here -- only need to look at the bottom 64 bits
1681               of the argument, really. */
1682            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1683            if (v128 == 0x0000) {
1684               e2 = IRExpr_Const(IRConst_V128(0x0000));
1685            } else {
1686               goto unhandled;
1687            }
1688            break;
1689         }
1690
1691         default:
1692            goto unhandled;
1693      }
1694      }
1695      break;
1696
1697   case Iex_Binop:
1698      /* BINARY ops */
1699      if (e->Iex.Binop.arg1->tag == Iex_Const
1700          && e->Iex.Binop.arg2->tag == Iex_Const) {
1701         /* cases where both args are consts */
1702         switch (e->Iex.Binop.op) {
1703
1704            /* -- Or -- */
1705            case Iop_Or8:
1706               e2 = IRExpr_Const(IRConst_U8(toUChar(
1707                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1708                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1709               break;
1710            case Iop_Or16:
1711               e2 = IRExpr_Const(IRConst_U16(toUShort(
1712                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1713                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1714               break;
1715            case Iop_Or32:
1716               e2 = IRExpr_Const(IRConst_U32(
1717                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1718                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1719               break;
1720            case Iop_Or64:
1721               e2 = IRExpr_Const(IRConst_U64(
1722                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1723                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1724               break;
1725            case Iop_OrV128:
1726               e2 = IRExpr_Const(IRConst_V128(
1727                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1728                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1729               break;
1730
1731            /* -- Xor -- */
1732            case Iop_Xor8:
1733               e2 = IRExpr_Const(IRConst_U8(toUChar(
1734                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1735                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1736               break;
1737            case Iop_Xor16:
1738               e2 = IRExpr_Const(IRConst_U16(toUShort(
1739                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1740                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1741               break;
1742            case Iop_Xor32:
1743               e2 = IRExpr_Const(IRConst_U32(
1744                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1745                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1746               break;
1747            case Iop_Xor64:
1748               e2 = IRExpr_Const(IRConst_U64(
1749                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1750                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1751               break;
1752            case Iop_XorV128:
1753               e2 = IRExpr_Const(IRConst_V128(
1754                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1755                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1756               break;
1757
1758            /* -- And -- */
1759            case Iop_And8:
1760               e2 = IRExpr_Const(IRConst_U8(toUChar(
1761                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1762                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1763               break;
1764            case Iop_And16:
1765               e2 = IRExpr_Const(IRConst_U16(toUShort(
1766                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1767                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1768               break;
1769            case Iop_And32:
1770               e2 = IRExpr_Const(IRConst_U32(
1771                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1772                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1773               break;
1774            case Iop_And64:
1775               e2 = IRExpr_Const(IRConst_U64(
1776                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1777                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1778               break;
1779            case Iop_AndV128:
1780               e2 = IRExpr_Const(IRConst_V128(
1781                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1782                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1783               break;
1784
1785            /* -- Add -- */
1786            case Iop_Add8:
1787               e2 = IRExpr_Const(IRConst_U8(toUChar(
1788                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1789                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1790               break;
1791            case Iop_Add32:
1792               e2 = IRExpr_Const(IRConst_U32(
1793                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1794                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1795               break;
1796            case Iop_Add64:
1797               e2 = IRExpr_Const(IRConst_U64(
1798                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1799                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1800               break;
1801
1802            /* -- Sub -- */
1803            case Iop_Sub8:
1804               e2 = IRExpr_Const(IRConst_U8(toUChar(
1805                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1806                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1807               break;
1808            case Iop_Sub32:
1809               e2 = IRExpr_Const(IRConst_U32(
1810                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1811                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1812               break;
1813            case Iop_Sub64:
1814               e2 = IRExpr_Const(IRConst_U64(
1815                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1816                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1817               break;
1818
1819            /* -- Max32U -- */
1820            case Iop_Max32U: {
1821               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1822               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1823               UInt res  = u32a > u32b ? u32a : u32b;
1824               e2 = IRExpr_Const(IRConst_U32(res));
1825               break;
1826            }
1827
1828            /* -- Mul -- */
1829            case Iop_Mul32:
1830               e2 = IRExpr_Const(IRConst_U32(
1831                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1832                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1833               break;
1834            case Iop_Mul64:
1835               e2 = IRExpr_Const(IRConst_U64(
1836                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1837                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1838               break;
1839
1840            case Iop_MullS32: {
1841               /* very paranoid */
1842               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1843               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1844               Int   s32a = (Int)u32a;
1845               Int   s32b = (Int)u32b;
1846               Long  s64a = (Long)s32a;
1847               Long  s64b = (Long)s32b;
1848               Long  sres = s64a * s64b;
1849               ULong ures = (ULong)sres;
1850               e2 = IRExpr_Const(IRConst_U64(ures));
1851               break;
1852            }
1853
1854            /* -- Shl -- */
1855            case Iop_Shl32:
1856               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1857               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1858               if (shift >= 0 && shift <= 31)
1859                  e2 = IRExpr_Const(IRConst_U32(
1860                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1861                           << shift)));
1862               break;
1863            case Iop_Shl64:
1864               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1865               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1866               if (shift >= 0 && shift <= 63)
1867                  e2 = IRExpr_Const(IRConst_U64(
1868                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1869                           << shift)));
1870               break;
1871
1872            /* -- Sar -- */
1873            case Iop_Sar32: {
1874               /* paranoid ... */
1875               /*signed*/ Int s32;
1876               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1877               s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1878               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1879               if (shift >= 0 && shift <= 31) {
1880                  s32 >>=/*signed*/ shift;
1881                  e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1882               }
1883               break;
1884            }
1885            case Iop_Sar64: {
1886               /* paranoid ... */
1887               /*signed*/ Long s64;
1888               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1889               s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1890               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1891               if (shift >= 0 && shift <= 63) {
1892                  s64 >>=/*signed*/ shift;
1893                  e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1894               }
1895               break;
1896            }
1897
1898            /* -- Shr -- */
1899            case Iop_Shr32: {
1900               /* paranoid ... */
1901               /*unsigned*/ UInt u32;
1902               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1903               u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1904               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1905               if (shift >= 0 && shift <= 31) {
1906                  u32 >>=/*unsigned*/ shift;
1907                  e2 = IRExpr_Const(IRConst_U32(u32));
1908               }
1909               break;
1910            }
1911            case Iop_Shr64: {
1912               /* paranoid ... */
1913               /*unsigned*/ ULong u64;
1914               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1915               u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1916               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1917               if (shift >= 0 && shift <= 63) {
1918                  u64 >>=/*unsigned*/ shift;
1919                  e2 = IRExpr_Const(IRConst_U64(u64));
1920               }
1921               break;
1922            }
1923
1924            /* -- CmpEQ -- */
1925            case Iop_CmpEQ32:
1926               e2 = IRExpr_Const(IRConst_U1(toBool(
1927                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1928                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1929               break;
1930            case Iop_CmpEQ64:
1931               e2 = IRExpr_Const(IRConst_U1(toBool(
1932                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1933                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1934               break;
1935
1936            /* -- CmpNE -- */
1937            case Iop_CmpNE8:
1938            case Iop_CasCmpNE8:
1939            case Iop_ExpCmpNE8:
1940               e2 = IRExpr_Const(IRConst_U1(toBool(
1941                       ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1942                        != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1943               break;
1944            case Iop_CmpNE32:
1945            case Iop_CasCmpNE32:
1946            case Iop_ExpCmpNE32:
1947               e2 = IRExpr_Const(IRConst_U1(toBool(
1948                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1949                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1950               break;
1951            case Iop_CmpNE64:
1952            case Iop_CasCmpNE64:
1953            case Iop_ExpCmpNE64:
1954               e2 = IRExpr_Const(IRConst_U1(toBool(
1955                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1956                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1957               break;
1958
1959            /* -- CmpLEU -- */
1960            case Iop_CmpLE32U:
1961               e2 = IRExpr_Const(IRConst_U1(toBool(
1962                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1963                        <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1964               break;
1965            case Iop_CmpLE64U:
1966               e2 = IRExpr_Const(IRConst_U1(toBool(
1967                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1968                        <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1969               break;
1970
1971            /* -- CmpLES -- */
1972            case Iop_CmpLE32S:
1973               e2 = IRExpr_Const(IRConst_U1(toBool(
1974                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1975                        <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1976               break;
1977            case Iop_CmpLE64S:
1978               e2 = IRExpr_Const(IRConst_U1(toBool(
1979                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1980                        <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1981               break;
1982
1983            /* -- CmpLTS -- */
1984            case Iop_CmpLT32S:
1985               e2 = IRExpr_Const(IRConst_U1(toBool(
1986                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1987                        < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1988               break;
1989            case Iop_CmpLT64S:
1990               e2 = IRExpr_Const(IRConst_U1(toBool(
1991                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1992                        < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1993               break;
1994
1995            /* -- CmpLTU -- */
1996            case Iop_CmpLT32U:
1997               e2 = IRExpr_Const(IRConst_U1(toBool(
1998                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1999                        < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2000               break;
2001            case Iop_CmpLT64U:
2002               e2 = IRExpr_Const(IRConst_U1(toBool(
2003                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2004                        < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2005               break;
2006
2007            /* -- CmpORD -- */
2008            case Iop_CmpORD32S: {
2009               /* very paranoid */
2010               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2011               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2012               Int   s32a = (Int)u32a;
2013               Int   s32b = (Int)u32b;
2014               Int   r = 0x2; /* EQ */
2015               if (s32a < s32b) {
2016                  r = 0x8; /* LT */
2017               }
2018               else if (s32a > s32b) {
2019                  r = 0x4; /* GT */
2020               }
2021               e2 = IRExpr_Const(IRConst_U32(r));
2022               break;
2023            }
2024
2025            /* -- nHLto2n -- */
2026            case Iop_32HLto64:
2027               e2 = IRExpr_Const(IRConst_U64(
2028                       (((ULong)(e->Iex.Binop.arg1
2029                                  ->Iex.Const.con->Ico.U32)) << 32)
2030                       | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2031                    ));
2032               break;
2033            case Iop_64HLto128:
2034               /* We can't fold this, because there is no way to
2035                  express he result in IR, but at least pretend to
2036                  handle it, so as to stop getting blasted with
2037                  no-rule-for-this-primop messages. */
2038               break;
2039            /* For this vector one, can't fold all cases, but at
2040               least do the most obvious one.  Could do better here
2041               using summarise/desummarise of vector constants, but
2042               too difficult to verify; hence just handle the zero
2043               cases. */
2044            case Iop_64HLtoV128: {
2045               ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2046               ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2047               if (0 == argHi && 0 == argLo) {
2048                  e2 = IRExpr_Const(IRConst_V128(0));
2049               } else {
2050                  goto unhandled;
2051               }
2052               break;
2053            }
2054            /* Same reasoning for the 256-bit version. */
2055            case Iop_V128HLtoV256: {
2056               IRExpr* argHi = e->Iex.Binop.arg1;
2057               IRExpr* argLo = e->Iex.Binop.arg2;
2058               if (isZeroV128(argHi) && isZeroV128(argLo)) {
2059                  e2 = IRExpr_Const(IRConst_V256(0));
2060               } else {
2061                  goto unhandled;
2062               }
2063               break;
2064            }
2065
2066            /* -- V128 stuff -- */
2067            case Iop_InterleaveLO8x16: {
2068               /* This turns up a lot in Memcheck instrumentation of
2069                  Icc generated code.  I don't know why. */
2070               UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2071               UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2072               if (0 == arg1 && 0 == arg2) {
2073                  e2 = IRExpr_Const(IRConst_V128(0));
2074               } else {
2075                  goto unhandled;
2076               }
2077               break;
2078            }
2079
2080            default:
2081               goto unhandled;
2082         }
2083
2084      } else {
2085
2086         /* other cases (identities, etc) */
2087         switch (e->Iex.Binop.op) {
2088
2089            case Iop_Shl32:
2090            case Iop_Shl64:
2091            case Iop_Shr64:
2092            case Iop_Sar64:
2093               /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2094               if (isZeroU(e->Iex.Binop.arg2)) {
2095                  e2 = e->Iex.Binop.arg1;
2096                  break;
2097               }
2098               /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2099               if (isZeroU(e->Iex.Binop.arg1)) {
2100                  e2 = e->Iex.Binop.arg1;
2101                  break;
2102               }
2103               break;
2104
2105            case Iop_Sar32:
2106            case Iop_Shr32:
2107               /* Shr32/Sar32(x,0) ==> x */
2108               if (isZeroU(e->Iex.Binop.arg2)) {
2109                  e2 = e->Iex.Binop.arg1;
2110                  break;
2111               }
2112               break;
2113
2114            case Iop_Or8:
2115            case Iop_Or16:
2116            case Iop_Or32:
2117            case Iop_Or64:
2118            case Iop_Max32U:
2119               /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2120               if (isZeroU(e->Iex.Binop.arg2)) {
2121                  e2 = e->Iex.Binop.arg1;
2122                  break;
2123               }
2124               /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2125               if (isZeroU(e->Iex.Binop.arg1)) {
2126                  e2 = e->Iex.Binop.arg2;
2127                  break;
2128               }
2129               /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2130               /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2131               if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2132                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2133                  break;
2134               }
2135               /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2136               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2137                  e2 = e->Iex.Binop.arg1;
2138                  break;
2139               }
2140               break;
2141
2142            case Iop_Add8:
2143               /* Add8(t,t) ==> t << 1.
2144                  Memcheck doesn't understand that
2145                  x+x produces a defined least significant bit, and it seems
2146                  simplest just to get rid of the problem by rewriting it
2147                  out, since the opportunity to do so exists. */
2148               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2149                  e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2150                                    IRExpr_Const(IRConst_U8(1)));
2151                  break;
2152               }
2153               break;
2154
2155               /* NB no Add16(t,t) case yet as no known test case exists */
2156
2157            case Iop_Add32:
2158            case Iop_Add64:
2159               /* Add32/Add64(x,0) ==> x */
2160               if (isZeroU(e->Iex.Binop.arg2)) {
2161                  e2 = e->Iex.Binop.arg1;
2162                  break;
2163               }
2164               /* Add32/Add64(0,x) ==> x */
2165               if (isZeroU(e->Iex.Binop.arg1)) {
2166                  e2 = e->Iex.Binop.arg2;
2167                  break;
2168               }
2169               /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2170               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2171                  e2 = IRExpr_Binop(
2172                          e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2173                          e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2174                  break;
2175               }
2176               break;
2177
2178            case Iop_Sub32:
2179            case Iop_Sub64:
2180               /* Sub32/Sub64(x,0) ==> x */
2181               if (isZeroU(e->Iex.Binop.arg2)) {
2182                  e2 = e->Iex.Binop.arg1;
2183                  break;
2184               }
2185               /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2186               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2187                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2188                  break;
2189               }
2190               break;
2191            case Iop_Sub8x16:
2192               /* Sub8x16(x,0) ==> x */
2193               if (isZeroV128(e->Iex.Binop.arg2)) {
2194                  e2 = e->Iex.Binop.arg1;
2195                  break;
2196               }
2197               break;
2198
2199            case Iop_And8:
2200            case Iop_And16:
2201            case Iop_And32:
2202            case Iop_And64:
2203               /* And8/And16/And32/And64(x,1---1b) ==> x */
2204               if (isOnesU(e->Iex.Binop.arg2)) {
2205                  e2 = e->Iex.Binop.arg1;
2206                  break;
2207               }
2208               /* And8/And16/And32/And64(1---1b,x) ==> x */
2209               if (isOnesU(e->Iex.Binop.arg1)) {
2210                  e2 = e->Iex.Binop.arg2;
2211                  break;
2212               }
2213               /* And8/And16/And32/And64(x,0) ==> 0 */
2214               if (isZeroU(e->Iex.Binop.arg2)) {
2215                  e2 = e->Iex.Binop.arg2;
2216                  break;
2217               }
2218               /* And8/And16/And32/And64(0,x) ==> 0 */
2219               if (isZeroU(e->Iex.Binop.arg1)) {
2220                  e2 = e->Iex.Binop.arg1;
2221                  break;
2222               }
2223               /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2224               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2225                  e2 = e->Iex.Binop.arg1;
2226                  break;
2227               }
2228               break;
2229
2230            case Iop_AndV128:
2231            case Iop_AndV256:
2232               /* And8/And16/AndV128/AndV256(t,t)
2233                  ==> t, for some IRTemp t */
2234               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2235                  e2 = e->Iex.Binop.arg1;
2236                  break;
2237               }
2238               /* Deal with either arg zero.  Could handle other And
2239                  cases here too. */
2240               if (e->Iex.Binop.op == Iop_AndV256
2241                   && (isZeroV256(e->Iex.Binop.arg1)
2242                       || isZeroV256(e->Iex.Binop.arg2))) {
2243                  e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2244                  break;
2245               } else if (e->Iex.Binop.op == Iop_AndV128
2246                          && (isZeroV128(e->Iex.Binop.arg1)
2247                              || isZeroV128(e->Iex.Binop.arg2))) {
2248                  e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2249                  break;
2250               }
2251               break;
2252
2253            case Iop_OrV128:
2254            case Iop_OrV256:
2255               /* V128/V256(t,t) ==> t, for some IRTemp t */
2256               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2257                  e2 = e->Iex.Binop.arg1;
2258                  break;
2259               }
2260               /* OrV128(t,0) ==> t */
2261               if (e->Iex.Binop.op == Iop_OrV128) {
2262                  if (isZeroV128(e->Iex.Binop.arg2)) {
2263                     e2 = e->Iex.Binop.arg1;
2264                     break;
2265                  }
2266                  if (isZeroV128(e->Iex.Binop.arg1)) {
2267                     e2 = e->Iex.Binop.arg2;
2268                     break;
2269                  }
2270               }
2271               /* OrV256(t,0) ==> t */
2272               if (e->Iex.Binop.op == Iop_OrV256) {
2273                  if (isZeroV256(e->Iex.Binop.arg2)) {
2274                     e2 = e->Iex.Binop.arg1;
2275                     break;
2276                  }
2277                  //Disabled because there's no known test case right now.
2278                  //if (isZeroV256(e->Iex.Binop.arg1)) {
2279                  //   e2 = e->Iex.Binop.arg2;
2280                  //   break;
2281                  //}
2282               }
2283               break;
2284
2285            case Iop_Xor8:
2286            case Iop_Xor16:
2287            case Iop_Xor32:
2288            case Iop_Xor64:
2289            case Iop_XorV128:
2290            case Iop_XorV256:
2291               /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2292               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2293                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2294                  break;
2295               }
2296               /* XorV128(t,0) ==> t */
2297               if (e->Iex.Binop.op == Iop_XorV128) {
2298                  if (isZeroV128(e->Iex.Binop.arg2)) {
2299                     e2 = e->Iex.Binop.arg1;
2300                     break;
2301                  }
2302                  //Disabled because there's no known test case right now.
2303                  //if (isZeroV128(e->Iex.Binop.arg1)) {
2304                  //   e2 = e->Iex.Binop.arg2;
2305                  //   break;
2306                  //}
2307               } else {
2308                  /* Xor8/16/32/64(0,t) ==> t */
2309                  if (isZeroU(e->Iex.Binop.arg1)) {
2310                     e2 = e->Iex.Binop.arg2;
2311                     break;
2312                  }
2313                  /* Xor8/16/32/64(t,0) ==> t */
2314                  if (isZeroU(e->Iex.Binop.arg2)) {
2315                     e2 = e->Iex.Binop.arg1;
2316                     break;
2317                  }
2318               }
2319               break;
2320
2321            case Iop_CmpNE32:
2322               /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2323               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2324                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2325                  break;
2326               }
2327               /* CmpNE32(1Uto32(b), 0) ==> b */
2328               if (isZeroU32(e->Iex.Binop.arg2)) {
2329                  IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2330                  if (a1 && a1->tag == Iex_Unop
2331                         && a1->Iex.Unop.op == Iop_1Uto32) {
2332                     e2 = a1->Iex.Unop.arg;
2333                     break;
2334                  }
2335               }
2336               break;
2337
2338            case Iop_CmpEQ32:
2339            case Iop_CmpEQ64:
2340            case Iop_CmpEQ8x8:
2341            case Iop_CmpEQ8x16:
2342            case Iop_CmpEQ16x8:
2343            case Iop_CmpEQ32x4:
2344               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2345                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2346                  break;
2347               }
2348               break;
2349
2350            default:
2351               break;
2352         }
2353      }
2354      break;
2355
2356   case Iex_ITE:
2357      /* ITE */
2358      /* is the discriminant is a constant? */
2359      if (e->Iex.ITE.cond->tag == Iex_Const) {
2360         /* assured us by the IR type rules */
2361         vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2362         e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2363                 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2364      }
2365      else
2366      /* are the arms identical? (pretty weedy test) */
2367      if (sameIRExprs(env, e->Iex.ITE.iftrue,
2368                           e->Iex.ITE.iffalse)) {
2369         e2 = e->Iex.ITE.iffalse;
2370      }
2371      break;
2372
2373   default:
2374      /* not considered */
2375      break;
2376   }
2377
2378   /* Show cases where we've found but not folded 'op(t,t)'.  Be
2379      careful not to call sameIRExprs with values of different types,
2380      though, else it will assert (and so it should!).  We can't
2381      conveniently call typeOfIRExpr on the two args without a whole
2382      bunch of extra plumbing to pass in a type env, so just use a
2383      hacky test to check the arguments are not anything that might
2384      sameIRExprs to assert.  This is only OK because this kludge is
2385      only used for debug printing, not for "real" operation.  For
2386      "real" operation (ie, all other calls to sameIRExprs), it is
2387      essential that the to args have the same type.
2388
2389      The "right" solution is to plumb the containing block's
2390      IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
2391      that's a bunch of extra parameter passing which will just slow
2392      down the normal case, for no purpose. */
2393   if (vex_control.iropt_verbosity > 0
2394       && e == e2
2395       && e->tag == Iex_Binop
2396       && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2397                                                    e->Iex.Binop.arg2)
2398       && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2399      vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2400      ppIRExpr(e);
2401      vex_printf("\n");
2402   }
2403
2404   /* Show the overall results of folding. */
2405   if (DEBUG_IROPT && e2 != e) {
2406      vex_printf("FOLD: ");
2407      ppIRExpr(e); vex_printf("  ->  ");
2408      ppIRExpr(e2); vex_printf("\n");
2409   }
2410
2411   return e2;
2412
2413 unhandled:
2414#  if 0
2415   vex_printf("\n\n");
2416   ppIRExpr(e);
2417   vpanic("fold_Expr: no rule for the above");
2418#  else
2419   if (vex_control.iropt_verbosity > 0) {
2420      vex_printf("vex iropt: fold_Expr: no const rule for: ");
2421      ppIRExpr(e);
2422      vex_printf("\n");
2423   }
2424   return e2;
2425#  endif
2426}
2427
2428
2429/* Apply the subst to a simple 1-level expression -- guaranteed to be
2430   1-level due to previous flattening pass. */
2431
2432static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2433{
2434   switch (ex->tag) {
2435      case Iex_RdTmp:
2436         if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2437            IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2438            if (rhs->tag == Iex_RdTmp)
2439               return rhs;
2440            if (rhs->tag == Iex_Const
2441                && rhs->Iex.Const.con->tag != Ico_F64i)
2442               return rhs;
2443         }
2444         /* not bound in env */
2445         return ex;
2446
2447      case Iex_Const:
2448      case Iex_Get:
2449         return ex;
2450
2451      case Iex_GetI:
2452         vassert(isIRAtom(ex->Iex.GetI.ix));
2453         return IRExpr_GetI(
2454            ex->Iex.GetI.descr,
2455            subst_Expr(env, ex->Iex.GetI.ix),
2456            ex->Iex.GetI.bias
2457         );
2458
2459      case Iex_Qop: {
2460         IRQop* qop = ex->Iex.Qop.details;
2461         vassert(isIRAtom(qop->arg1));
2462         vassert(isIRAtom(qop->arg2));
2463         vassert(isIRAtom(qop->arg3));
2464         vassert(isIRAtom(qop->arg4));
2465         return IRExpr_Qop(
2466                   qop->op,
2467                   subst_Expr(env, qop->arg1),
2468                   subst_Expr(env, qop->arg2),
2469                   subst_Expr(env, qop->arg3),
2470                   subst_Expr(env, qop->arg4)
2471                );
2472      }
2473
2474      case Iex_Triop: {
2475         IRTriop* triop = ex->Iex.Triop.details;
2476         vassert(isIRAtom(triop->arg1));
2477         vassert(isIRAtom(triop->arg2));
2478         vassert(isIRAtom(triop->arg3));
2479         return IRExpr_Triop(
2480                   triop->op,
2481                   subst_Expr(env, triop->arg1),
2482                   subst_Expr(env, triop->arg2),
2483                   subst_Expr(env, triop->arg3)
2484                );
2485      }
2486
2487      case Iex_Binop:
2488         vassert(isIRAtom(ex->Iex.Binop.arg1));
2489         vassert(isIRAtom(ex->Iex.Binop.arg2));
2490         return IRExpr_Binop(
2491                   ex->Iex.Binop.op,
2492                   subst_Expr(env, ex->Iex.Binop.arg1),
2493                   subst_Expr(env, ex->Iex.Binop.arg2)
2494                );
2495
2496      case Iex_Unop:
2497         vassert(isIRAtom(ex->Iex.Unop.arg));
2498         return IRExpr_Unop(
2499                   ex->Iex.Unop.op,
2500                   subst_Expr(env, ex->Iex.Unop.arg)
2501                );
2502
2503      case Iex_Load:
2504         vassert(isIRAtom(ex->Iex.Load.addr));
2505         return IRExpr_Load(
2506                   ex->Iex.Load.end,
2507                   ex->Iex.Load.ty,
2508                   subst_Expr(env, ex->Iex.Load.addr)
2509                );
2510
2511      case Iex_CCall: {
2512         Int      i;
2513         IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2514         for (i = 0; args2[i]; i++) {
2515            vassert(isIRAtom(args2[i]));
2516            args2[i] = subst_Expr(env, args2[i]);
2517         }
2518         return IRExpr_CCall(
2519                   ex->Iex.CCall.cee,
2520                   ex->Iex.CCall.retty,
2521                   args2
2522                );
2523      }
2524
2525      case Iex_ITE:
2526         vassert(isIRAtom(ex->Iex.ITE.cond));
2527         vassert(isIRAtom(ex->Iex.ITE.iftrue));
2528         vassert(isIRAtom(ex->Iex.ITE.iffalse));
2529         return IRExpr_ITE(
2530                   subst_Expr(env, ex->Iex.ITE.cond),
2531                   subst_Expr(env, ex->Iex.ITE.iftrue),
2532                   subst_Expr(env, ex->Iex.ITE.iffalse)
2533                );
2534
2535      default:
2536         vex_printf("\n\n"); ppIRExpr(ex);
2537         vpanic("subst_Expr");
2538
2539   }
2540}
2541
2542
2543/* Apply the subst to stmt, then fold the result as much as possible.
2544   Much simplified due to stmt being previously flattened.  As a
2545   result of this, the stmt may wind up being turned into a no-op.
2546*/
2547static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2548{
2549#  if 0
2550   vex_printf("\nsubst and fold stmt\n");
2551   ppIRStmt(st);
2552   vex_printf("\n");
2553#  endif
2554
2555   switch (st->tag) {
2556      case Ist_AbiHint:
2557         vassert(isIRAtom(st->Ist.AbiHint.base));
2558         vassert(isIRAtom(st->Ist.AbiHint.nia));
2559         return IRStmt_AbiHint(
2560                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2561                   st->Ist.AbiHint.len,
2562                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2563                );
2564      case Ist_Put:
2565         vassert(isIRAtom(st->Ist.Put.data));
2566         return IRStmt_Put(
2567                   st->Ist.Put.offset,
2568                   fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2569                );
2570
2571      case Ist_PutI: {
2572         IRPutI *puti, *puti2;
2573         puti = st->Ist.PutI.details;
2574         vassert(isIRAtom(puti->ix));
2575         vassert(isIRAtom(puti->data));
2576         puti2 = mkIRPutI(puti->descr,
2577                          fold_Expr(env, subst_Expr(env, puti->ix)),
2578                          puti->bias,
2579                          fold_Expr(env, subst_Expr(env, puti->data)));
2580         return IRStmt_PutI(puti2);
2581      }
2582
2583      case Ist_WrTmp:
2584         /* This is the one place where an expr (st->Ist.WrTmp.data) is
2585            allowed to be more than just a constant or a tmp. */
2586         return IRStmt_WrTmp(
2587                   st->Ist.WrTmp.tmp,
2588                   fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2589                );
2590
2591      case Ist_Store:
2592         vassert(isIRAtom(st->Ist.Store.addr));
2593         vassert(isIRAtom(st->Ist.Store.data));
2594         return IRStmt_Store(
2595                   st->Ist.Store.end,
2596                   fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2597                   fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2598                );
2599
2600      case Ist_StoreG: {
2601         IRStoreG* sg = st->Ist.StoreG.details;
2602         vassert(isIRAtom(sg->addr));
2603         vassert(isIRAtom(sg->data));
2604         vassert(isIRAtom(sg->guard));
2605         IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
2606         IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
2607         IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2608         if (fguard->tag == Iex_Const) {
2609            /* The condition on this store has folded down to a constant. */
2610            vassert(fguard->Iex.Const.con->tag == Ico_U1);
2611            if (fguard->Iex.Const.con->Ico.U1 == False) {
2612               return IRStmt_NoOp();
2613            } else {
2614               vassert(fguard->Iex.Const.con->Ico.U1 == True);
2615               return IRStmt_Store(sg->end, faddr, fdata);
2616            }
2617         }
2618         return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2619      }
2620
2621      case Ist_LoadG: {
2622         /* This is complicated.  If the guard folds down to 'false',
2623            we can replace it with an assignment 'dst := alt', but if
2624            the guard folds down to 'true', we can't conveniently
2625            replace it with an unconditional load, because doing so
2626            requires generating a new temporary, and that is not easy
2627            to do at this point. */
2628         IRLoadG* lg = st->Ist.LoadG.details;
2629         vassert(isIRAtom(lg->addr));
2630         vassert(isIRAtom(lg->alt));
2631         vassert(isIRAtom(lg->guard));
2632         IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
2633         IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
2634         IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2635         if (fguard->tag == Iex_Const) {
2636            /* The condition on this load has folded down to a constant. */
2637            vassert(fguard->Iex.Const.con->tag == Ico_U1);
2638            if (fguard->Iex.Const.con->Ico.U1 == False) {
2639               /* The load is not going to happen -- instead 'alt' is
2640                  assigned to 'dst'.  */
2641               return IRStmt_WrTmp(lg->dst, falt);
2642            } else {
2643               vassert(fguard->Iex.Const.con->Ico.U1 == True);
2644               /* The load is always going to happen.  We want to
2645                  convert to an unconditional load and assign to 'dst'
2646                  (IRStmt_WrTmp).  Problem is we need an extra temp to
2647                  hold the loaded value, but none is available.
2648                  Instead, reconstitute the conditional load (with
2649                  folded args, of course) and let the caller of this
2650                  routine deal with the problem. */
2651            }
2652         }
2653         return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2654      }
2655
2656      case Ist_CAS: {
2657         IRCAS *cas, *cas2;
2658         cas = st->Ist.CAS.details;
2659         vassert(isIRAtom(cas->addr));
2660         vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2661         vassert(isIRAtom(cas->expdLo));
2662         vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2663         vassert(isIRAtom(cas->dataLo));
2664         cas2 = mkIRCAS(
2665                   cas->oldHi, cas->oldLo, cas->end,
2666                   fold_Expr(env, subst_Expr(env, cas->addr)),
2667                   cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2668                               : NULL,
2669                   fold_Expr(env, subst_Expr(env, cas->expdLo)),
2670                   cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2671                               : NULL,
2672                   fold_Expr(env, subst_Expr(env, cas->dataLo))
2673                );
2674         return IRStmt_CAS(cas2);
2675      }
2676
2677      case Ist_LLSC:
2678         vassert(isIRAtom(st->Ist.LLSC.addr));
2679         if (st->Ist.LLSC.storedata)
2680            vassert(isIRAtom(st->Ist.LLSC.storedata));
2681         return IRStmt_LLSC(
2682                   st->Ist.LLSC.end,
2683                   st->Ist.LLSC.result,
2684                   fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2685                   st->Ist.LLSC.storedata
2686                      ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2687                      : NULL
2688                );
2689
2690      case Ist_Dirty: {
2691         Int     i;
2692         IRDirty *d, *d2;
2693         d = st->Ist.Dirty.details;
2694         d2 = emptyIRDirty();
2695         *d2 = *d;
2696         d2->args = shallowCopyIRExprVec(d2->args);
2697         if (d2->mFx != Ifx_None) {
2698            vassert(isIRAtom(d2->mAddr));
2699            d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2700         }
2701         vassert(isIRAtom(d2->guard));
2702         d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2703         for (i = 0; d2->args[i]; i++) {
2704            IRExpr* arg = d2->args[i];
2705            if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
2706               vassert(isIRAtom(arg));
2707               d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2708            }
2709         }
2710         return IRStmt_Dirty(d2);
2711      }
2712
2713      case Ist_IMark:
2714         return IRStmt_IMark(st->Ist.IMark.addr,
2715                             st->Ist.IMark.len,
2716                             st->Ist.IMark.delta);
2717
2718      case Ist_NoOp:
2719         return IRStmt_NoOp();
2720
2721      case Ist_MBE:
2722         return IRStmt_MBE(st->Ist.MBE.event);
2723
2724      case Ist_Exit: {
2725         IRExpr* fcond;
2726         vassert(isIRAtom(st->Ist.Exit.guard));
2727         fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2728         if (fcond->tag == Iex_Const) {
2729            /* Interesting.  The condition on this exit has folded down to
2730               a constant. */
2731            vassert(fcond->Iex.Const.con->tag == Ico_U1);
2732            if (fcond->Iex.Const.con->Ico.U1 == False) {
2733               /* exit is never going to happen, so dump the statement. */
2734               return IRStmt_NoOp();
2735            } else {
2736               vassert(fcond->Iex.Const.con->Ico.U1 == True);
2737               /* Hmmm.  The exit has become unconditional.  Leave it
2738                  as it is for now, since we'd have to truncate the BB
2739                  at this point, which is tricky.  Such truncation is
2740                  done later by the dead-code elimination pass. */
2741               /* fall out into the reconstruct-the-exit code. */
2742               if (vex_control.iropt_verbosity > 0)
2743                  /* really a misuse of vex_control.iropt_verbosity */
2744                  vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2745            }
2746         }
2747         return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2748                                   st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2749      }
2750
2751   default:
2752      vex_printf("\n"); ppIRStmt(st);
2753      vpanic("subst_and_fold_Stmt");
2754   }
2755}
2756
2757
2758IRSB* cprop_BB ( IRSB* in )
2759{
2760   Int      i;
2761   IRSB*    out;
2762   IRStmt*  st2;
2763   Int      n_tmps = in->tyenv->types_used;
2764   IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2765   /* Keep track of IRStmt_LoadGs that we need to revisit after
2766      processing all the other statements. */
2767   const Int N_FIXUPS = 16;
2768   Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2769   Int n_fixups = 0;
2770
2771   out = emptyIRSB();
2772   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2773
2774   /* Set up the env with which travels forward.  This holds a
2775      substitution, mapping IRTemps to IRExprs. The environment
2776      is to be applied as we move along.  Keys are IRTemps.
2777      Values are IRExpr*s.
2778   */
2779   for (i = 0; i < n_tmps; i++)
2780      env[i] = NULL;
2781
2782   /* For each original SSA-form stmt ... */
2783   for (i = 0; i < in->stmts_used; i++) {
2784
2785      /* First apply the substitution to the current stmt.  This
2786         propagates in any constants and tmp-tmp assignments
2787         accumulated prior to this point.  As part of the subst_Stmt
2788         call, also then fold any constant expressions resulting. */
2789
2790      st2 = in->stmts[i];
2791
2792      /* perhaps st2 is already a no-op? */
2793      if (st2->tag == Ist_NoOp) continue;
2794
2795      st2 = subst_and_fold_Stmt( env, st2 );
2796
2797      /* Deal with some post-folding special cases. */
2798      switch (st2->tag) {
2799
2800         /* If the statement has been folded into a no-op, forget
2801            it. */
2802         case Ist_NoOp:
2803            continue;
2804
2805         /* If the statement assigns to an IRTemp add it to the
2806            running environment. This is for the benefit of copy
2807            propagation and to allow sameIRExpr look through
2808            IRTemps. */
2809         case Ist_WrTmp: {
2810            vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2811            env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2812
2813            /* 't1 = t2' -- don't add to BB; will be optimized out */
2814            if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2815               continue;
2816
2817            /* 't = const' && 'const != F64i' -- don't add to BB
2818               Note, we choose not to propagate const when const is an
2819               F64i, so that F64i literals can be CSE'd later.  This
2820               helps x86 floating point code generation. */
2821            if (st2->Ist.WrTmp.data->tag == Iex_Const
2822                && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2823               continue;
2824            }
2825            /* else add it to the output, as normal */
2826            break;
2827         }
2828
2829         case Ist_LoadG: {
2830            IRLoadG* lg    = st2->Ist.LoadG.details;
2831            IRExpr*  guard = lg->guard;
2832            if (guard->tag == Iex_Const) {
2833               /* The guard has folded to a constant, and that
2834                  constant must be 1:I1, since subst_and_fold_Stmt
2835                  folds out the case 0:I1 by itself. */
2836               vassert(guard->Iex.Const.con->tag == Ico_U1);
2837               vassert(guard->Iex.Const.con->Ico.U1 == True);
2838               /* Add a NoOp here as a placeholder, and make a note of
2839                  where it is in the output block.  Afterwards we'll
2840                  come back here and transform the NoOp and the LoadG
2841                  into a load-convert pair.  The fixups[] entry
2842                  refers to the inserted NoOp, and we expect to find
2843                  the relevant LoadG immediately after it. */
2844               vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2845               if (n_fixups < N_FIXUPS) {
2846                  fixups[n_fixups++] = out->stmts_used;
2847                  addStmtToIRSB( out, IRStmt_NoOp() );
2848               }
2849            }
2850            /* And always add the LoadG to the output, regardless. */
2851            break;
2852         }
2853
2854      default:
2855         break;
2856      }
2857
2858      /* Not interesting, copy st2 into the output block. */
2859      addStmtToIRSB( out, st2 );
2860   }
2861
2862#  if STATS_IROPT
2863   vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
2864              invocation_count, recursion_count, success_count,
2865              recursion_success_count, max_nodes_visited);
2866#  endif
2867
2868   out->next     = subst_Expr( env, in->next );
2869   out->jumpkind = in->jumpkind;
2870   out->offsIP   = in->offsIP;
2871
2872   /* Process any leftover unconditional LoadGs that we noticed
2873      in the main pass. */
2874   vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2875   for (i = 0; i < n_fixups; i++) {
2876      Int ix = fixups[i];
2877      /* Carefully verify that the LoadG has the expected form. */
2878      vassert(ix >= 0 && ix+1 < out->stmts_used);
2879      IRStmt* nop = out->stmts[ix];
2880      IRStmt* lgu = out->stmts[ix+1];
2881      vassert(nop->tag == Ist_NoOp);
2882      vassert(lgu->tag == Ist_LoadG);
2883      IRLoadG* lg    = lgu->Ist.LoadG.details;
2884      IRExpr*  guard = lg->guard;
2885      vassert(guard->Iex.Const.con->tag == Ico_U1);
2886      vassert(guard->Iex.Const.con->Ico.U1 == True);
2887      /* Figure out the load and result types, and the implied
2888         conversion operation. */
2889      IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2890      typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2891      IROp cvtOp = Iop_INVALID;
2892      switch (lg->cvt) {
2893         case ILGop_IdentV128:
2894         case ILGop_Ident64:
2895         case ILGop_Ident32: break;
2896         case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
2897         case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
2898         case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2899         case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2900         default: vpanic("cprop_BB: unhandled ILGOp");
2901      }
2902      /* Replace the placeholder NoOp by the required unconditional
2903         load. */
2904      IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2905      out->stmts[ix]
2906         = IRStmt_WrTmp(tLoaded,
2907                        IRExpr_Load(lg->end, cvtArg, lg->addr));
2908      /* Replace the LoadG by a conversion from the loaded value's
2909         type to the required result type. */
2910      out->stmts[ix+1]
2911         = IRStmt_WrTmp(
2912              lg->dst, cvtOp == Iop_INVALID
2913                          ? IRExpr_RdTmp(tLoaded)
2914                          : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2915   }
2916
2917   return out;
2918}
2919
2920
2921/*---------------------------------------------------------------*/
2922/*--- Dead code (t = E) removal                               ---*/
2923/*---------------------------------------------------------------*/
2924
2925/* As a side effect, also removes all code following an unconditional
2926   side exit. */
2927
2928/* The type of the HashHW map is: a map from IRTemp to nothing
2929   -- really just operating a set or IRTemps.
2930*/
2931
2932inline
2933static void addUses_Temp ( Bool* set, IRTemp tmp )
2934{
2935   set[(Int)tmp] = True;
2936}
2937
2938static void addUses_Expr ( Bool* set, IRExpr* e )
2939{
2940   Int i;
2941   switch (e->tag) {
2942      case Iex_GetI:
2943         addUses_Expr(set, e->Iex.GetI.ix);
2944         return;
2945      case Iex_ITE:
2946         addUses_Expr(set, e->Iex.ITE.cond);
2947         addUses_Expr(set, e->Iex.ITE.iftrue);
2948         addUses_Expr(set, e->Iex.ITE.iffalse);
2949         return;
2950      case Iex_CCall:
2951         for (i = 0; e->Iex.CCall.args[i]; i++)
2952            addUses_Expr(set, e->Iex.CCall.args[i]);
2953         return;
2954      case Iex_Load:
2955         addUses_Expr(set, e->Iex.Load.addr);
2956         return;
2957      case Iex_Qop:
2958         addUses_Expr(set, e->Iex.Qop.details->arg1);
2959         addUses_Expr(set, e->Iex.Qop.details->arg2);
2960         addUses_Expr(set, e->Iex.Qop.details->arg3);
2961         addUses_Expr(set, e->Iex.Qop.details->arg4);
2962         return;
2963      case Iex_Triop:
2964         addUses_Expr(set, e->Iex.Triop.details->arg1);
2965         addUses_Expr(set, e->Iex.Triop.details->arg2);
2966         addUses_Expr(set, e->Iex.Triop.details->arg3);
2967         return;
2968      case Iex_Binop:
2969         addUses_Expr(set, e->Iex.Binop.arg1);
2970         addUses_Expr(set, e->Iex.Binop.arg2);
2971         return;
2972      case Iex_Unop:
2973         addUses_Expr(set, e->Iex.Unop.arg);
2974         return;
2975      case Iex_RdTmp:
2976         addUses_Temp(set, e->Iex.RdTmp.tmp);
2977         return;
2978      case Iex_Const:
2979      case Iex_Get:
2980         return;
2981      default:
2982         vex_printf("\n");
2983         ppIRExpr(e);
2984         vpanic("addUses_Expr");
2985   }
2986}
2987
2988static void addUses_Stmt ( Bool* set, IRStmt* st )
2989{
2990   Int      i;
2991   IRDirty* d;
2992   IRCAS*   cas;
2993   switch (st->tag) {
2994      case Ist_AbiHint:
2995         addUses_Expr(set, st->Ist.AbiHint.base);
2996         addUses_Expr(set, st->Ist.AbiHint.nia);
2997         return;
2998      case Ist_PutI:
2999         addUses_Expr(set, st->Ist.PutI.details->ix);
3000         addUses_Expr(set, st->Ist.PutI.details->data);
3001         return;
3002      case Ist_WrTmp:
3003         addUses_Expr(set, st->Ist.WrTmp.data);
3004         return;
3005      case Ist_Put:
3006         addUses_Expr(set, st->Ist.Put.data);
3007         return;
3008      case Ist_Store:
3009         addUses_Expr(set, st->Ist.Store.addr);
3010         addUses_Expr(set, st->Ist.Store.data);
3011         return;
3012      case Ist_StoreG: {
3013         IRStoreG* sg = st->Ist.StoreG.details;
3014         addUses_Expr(set, sg->addr);
3015         addUses_Expr(set, sg->data);
3016         addUses_Expr(set, sg->guard);
3017         return;
3018      }
3019      case Ist_LoadG: {
3020         IRLoadG* lg = st->Ist.LoadG.details;
3021         addUses_Expr(set, lg->addr);
3022         addUses_Expr(set, lg->alt);
3023         addUses_Expr(set, lg->guard);
3024         return;
3025      }
3026      case Ist_CAS:
3027         cas = st->Ist.CAS.details;
3028         addUses_Expr(set, cas->addr);
3029         if (cas->expdHi)
3030            addUses_Expr(set, cas->expdHi);
3031         addUses_Expr(set, cas->expdLo);
3032         if (cas->dataHi)
3033            addUses_Expr(set, cas->dataHi);
3034         addUses_Expr(set, cas->dataLo);
3035         return;
3036      case Ist_LLSC:
3037         addUses_Expr(set, st->Ist.LLSC.addr);
3038         if (st->Ist.LLSC.storedata)
3039            addUses_Expr(set, st->Ist.LLSC.storedata);
3040         return;
3041      case Ist_Dirty:
3042         d = st->Ist.Dirty.details;
3043         if (d->mFx != Ifx_None)
3044            addUses_Expr(set, d->mAddr);
3045         addUses_Expr(set, d->guard);
3046         for (i = 0; d->args[i] != NULL; i++) {
3047            IRExpr* arg = d->args[i];
3048            if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
3049               addUses_Expr(set, arg);
3050         }
3051         return;
3052      case Ist_NoOp:
3053      case Ist_IMark:
3054      case Ist_MBE:
3055         return;
3056      case Ist_Exit:
3057         addUses_Expr(set, st->Ist.Exit.guard);
3058         return;
3059      default:
3060         vex_printf("\n");
3061         ppIRStmt(st);
3062         vpanic("addUses_Stmt");
3063   }
3064}
3065
3066
3067/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
3068static Bool isZeroU1 ( IRExpr* e )
3069{
3070   return toBool( e->tag == Iex_Const
3071                  && e->Iex.Const.con->tag == Ico_U1
3072                  && e->Iex.Const.con->Ico.U1 == False );
3073}
3074
3075/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
3076static Bool isOneU1 ( IRExpr* e )
3077{
3078   return toBool( e->tag == Iex_Const
3079                  && e->Iex.Const.con->tag == Ico_U1
3080                  && e->Iex.Const.con->Ico.U1 == True );
3081}
3082
3083
3084/* Note, this destructively modifies the given IRSB. */
3085
3086/* Scan backwards through statements, carrying a set of IRTemps which
3087   are known to be used after the current point.  On encountering 't =
3088   E', delete the binding if it is not used.  Otherwise, add any temp
3089   uses to the set and keep on moving backwards.
3090
3091   As an enhancement, the first (backwards) pass searches for IR exits
3092   with always-taken conditions and notes the location of the earliest
3093   one in the block.  If any such are found, a second pass copies the
3094   exit destination and jump kind to the bb-end.  Then, the exit and
3095   all statements following it are turned into no-ops.
3096*/
3097
3098/* notstatic */ void do_deadcode_BB ( IRSB* bb )
3099{
3100   Int     i, i_unconditional_exit;
3101   Int     n_tmps = bb->tyenv->types_used;
3102   Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3103   IRStmt* st;
3104
3105   for (i = 0; i < n_tmps; i++)
3106      set[i] = False;
3107
3108   /* start off by recording IRTemp uses in the next field. */
3109   addUses_Expr(set, bb->next);
3110
3111   /* First pass */
3112
3113   /* Work backwards through the stmts */
3114   i_unconditional_exit = -1;
3115   for (i = bb->stmts_used-1; i >= 0; i--) {
3116      st = bb->stmts[i];
3117      if (st->tag == Ist_NoOp)
3118         continue;
3119      /* take note of any unconditional exits */
3120      if (st->tag == Ist_Exit
3121          && isOneU1(st->Ist.Exit.guard))
3122         i_unconditional_exit = i;
3123      if (st->tag == Ist_WrTmp
3124          && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3125          /* it's an IRTemp which never got used.  Delete it. */
3126         if (DEBUG_IROPT) {
3127            vex_printf("DEAD: ");
3128            ppIRStmt(st);
3129            vex_printf("\n");
3130         }
3131         bb->stmts[i] = IRStmt_NoOp();
3132      }
3133      else
3134      if (st->tag == Ist_Dirty
3135          && st->Ist.Dirty.details->guard
3136          && isZeroU1(st->Ist.Dirty.details->guard)) {
3137         /* This is a dirty helper which will never get called.
3138            Delete it. */
3139         bb->stmts[i] = IRStmt_NoOp();
3140       }
3141       else {
3142         /* Note any IRTemp uses made by the current statement. */
3143         addUses_Stmt(set, st);
3144      }
3145   }
3146
3147   /* Optional second pass: if any unconditional exits were found,
3148      delete them and all following statements. */
3149
3150   if (i_unconditional_exit != -1) {
3151      if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3152                        i_unconditional_exit);
3153      vassert(i_unconditional_exit >= 0
3154              && i_unconditional_exit < bb->stmts_used);
3155      bb->next
3156         = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3157      bb->jumpkind
3158         = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3159      bb->offsIP
3160         = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3161      for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3162         bb->stmts[i] = IRStmt_NoOp();
3163   }
3164}
3165
3166
3167/*---------------------------------------------------------------*/
3168/*--- Specialisation of helper function calls, in             ---*/
3169/*--- collaboration with the front end                        ---*/
3170/*---------------------------------------------------------------*/
3171
3172static
3173IRSB* spec_helpers_BB(
3174         IRSB* bb,
3175         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3176      )
3177{
3178   Int     i;
3179   IRStmt* st;
3180   IRExpr* ex;
3181   Bool    any = False;
3182
3183   for (i = bb->stmts_used-1; i >= 0; i--) {
3184      st = bb->stmts[i];
3185
3186      if (st->tag != Ist_WrTmp
3187          || st->Ist.WrTmp.data->tag != Iex_CCall)
3188         continue;
3189
3190      ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3191                          st->Ist.WrTmp.data->Iex.CCall.args,
3192                          &bb->stmts[0], i );
3193      if (!ex)
3194        /* the front end can't think of a suitable replacement */
3195        continue;
3196
3197      /* We got something better.  Install it in the bb. */
3198      any = True;
3199      bb->stmts[i]
3200         = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3201
3202      if (0) {
3203         vex_printf("SPEC: ");
3204         ppIRExpr(st->Ist.WrTmp.data);
3205         vex_printf("  -->  ");
3206         ppIRExpr(ex);
3207         vex_printf("\n");
3208      }
3209   }
3210
3211   if (any)
3212      bb = flatten_BB(bb);
3213   return bb;
3214}
3215
3216
3217/*---------------------------------------------------------------*/
3218/*--- Determination of guest state aliasing relationships     ---*/
3219/*---------------------------------------------------------------*/
3220
3221/* These are helper functions for CSE and GetI/PutI transformations.
3222
3223   Determine, to the extent possible, the relationship between two
3224   guest state accesses.  The possible outcomes are:
3225
3226   * Exact alias.  These two accesses denote precisely the same
3227     piece of the guest state.
3228
3229   * Definitely no alias.  These two accesses are guaranteed not to
3230     overlap any part of the guest state.
3231
3232   * Unknown -- if neither of the above can be established.
3233
3234   If in doubt, return Unknown.  */
3235
3236typedef
3237   enum { ExactAlias, NoAlias, UnknownAlias }
3238   GSAliasing;
3239
3240
3241/* Produces the alias relation between an indexed guest
3242   state access and a non-indexed access. */
3243
3244static
3245GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3246                                    Int offset2, IRType ty2 )
3247{
3248   UInt minoff1, maxoff1, minoff2, maxoff2;
3249
3250   getArrayBounds( descr1, &minoff1, &maxoff1 );
3251   minoff2 = offset2;
3252   maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3253
3254   if (maxoff1 < minoff2 || maxoff2 < minoff1)
3255      return NoAlias;
3256
3257   /* Could probably do better here if required.  For the moment
3258      however just claim not to know anything more. */
3259   return UnknownAlias;
3260}
3261
3262
3263/* Produces the alias relation between two indexed guest state
3264   accesses. */
3265
3266static
3267GSAliasing getAliasingRelation_II (
3268              IRRegArray* descr1, IRExpr* ix1, Int bias1,
3269              IRRegArray* descr2, IRExpr* ix2, Int bias2
3270           )
3271{
3272   UInt minoff1, maxoff1, minoff2, maxoff2;
3273   Int  iters;
3274
3275   /* First try hard to show they don't alias. */
3276   getArrayBounds( descr1, &minoff1, &maxoff1 );
3277   getArrayBounds( descr2, &minoff2, &maxoff2 );
3278   if (maxoff1 < minoff2 || maxoff2 < minoff1)
3279      return NoAlias;
3280
3281   /* So the two arrays at least partially overlap.  To get any
3282      further we'll have to be sure that the descriptors are
3283      identical. */
3284   if (!eqIRRegArray(descr1, descr2))
3285      return UnknownAlias;
3286
3287   /* The descriptors are identical.  Now the only difference can be
3288      in the index expressions.  If they cannot be shown to be
3289      identical, we have to say we don't know what the aliasing
3290      relation will be.  Now, since the IR is flattened, the index
3291      expressions should be atoms -- either consts or tmps.  So that
3292      makes the comparison simple. */
3293   vassert(isIRAtom(ix1));
3294   vassert(isIRAtom(ix2));
3295   if (!eqIRAtom(ix1,ix2))
3296      return UnknownAlias;
3297
3298   /* Ok, the index expressions are identical.  So now the only way
3299      they can be different is in the bias.  Normalise this
3300      paranoidly, to reliably establish equality/non-equality. */
3301
3302   /* So now we know that the GetI and PutI index the same array
3303      with the same base.  Are the offsets the same, modulo the
3304      array size?  Do this paranoidly. */
3305   vassert(descr1->nElems == descr2->nElems);
3306   vassert(descr1->elemTy == descr2->elemTy);
3307   vassert(descr1->base   == descr2->base);
3308   iters = 0;
3309   while (bias1 < 0 || bias2 < 0) {
3310      bias1 += descr1->nElems;
3311      bias2 += descr1->nElems;
3312      iters++;
3313      if (iters > 10)
3314         vpanic("getAliasingRelation: iters");
3315   }
3316   vassert(bias1 >= 0 && bias2 >= 0);
3317   bias1 %= descr1->nElems;
3318   bias2 %= descr1->nElems;
3319   vassert(bias1 >= 0 && bias1 < descr1->nElems);
3320   vassert(bias2 >= 0 && bias2 < descr1->nElems);
3321
3322   /* Finally, biasP and biasG are normalised into the range
3323      0 .. descrP/G->nElems - 1.  And so we can establish
3324      equality/non-equality. */
3325
3326   return bias1==bias2 ? ExactAlias : NoAlias;
3327}
3328
3329
3330/*---------------------------------------------------------------*/
3331/*--- Common Subexpression Elimination                        ---*/
3332/*---------------------------------------------------------------*/
3333
3334/* Expensive in time and space. */
3335
3336/* Uses two environments:
3337   a IRTemp -> IRTemp mapping
3338   a mapping from AvailExpr* to IRTemp
3339*/
3340
3341typedef
3342   struct {
3343      enum { TCc, TCt } tag;
3344      union { IRTemp tmp; IRConst* con; } u;
3345   }
3346   TmpOrConst;
3347
3348static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3349{
3350   if (tc1->tag != tc2->tag)
3351      return False;
3352   switch (tc1->tag) {
3353      case TCc:
3354         return eqIRConst(tc1->u.con, tc2->u.con);
3355      case TCt:
3356         return tc1->u.tmp == tc2->u.tmp;
3357      default:
3358         vpanic("eqTmpOrConst");
3359   }
3360}
3361
3362static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3363{
3364   Bool eq = cee1->addr == cee2->addr;
3365   if (eq) {
3366      vassert(cee1->regparms == cee2->regparms);
3367      vassert(cee1->mcx_mask == cee2->mcx_mask);
3368      /* Names should be the same too, but we don't bother to
3369         check. */
3370   }
3371   return eq;
3372}
3373
3374/* Convert an atomic IRExpr* to a TmpOrConst. */
3375static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3376{
3377   switch (e->tag) {
3378      case Iex_RdTmp:
3379         tc->tag   = TCt;
3380         tc->u.tmp = e->Iex.RdTmp.tmp;
3381         break;
3382      case Iex_Const:
3383         tc->tag   = TCc;
3384         tc->u.con = e->Iex.Const.con;
3385         break;
3386      default:
3387         /* Getting here is a serious error.  It means that the
3388            presented arg isn't an IR atom, as it should be. */
3389         vpanic("irExpr_to_TmpOrConst");
3390   }
3391}
3392
3393/* Convert a TmpOrConst to an atomic IRExpr*. */
3394static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3395{
3396   switch (tc->tag) {
3397      case TCc: return IRExpr_Const(tc->u.con);
3398      case TCt: return IRExpr_RdTmp(tc->u.tmp);
3399      default:  vpanic("tmpOrConst_to_IRExpr");
3400   }
3401}
3402
3403/* Convert a NULL terminated IRExpr* vector to an array of
3404   TmpOrConsts, and a length. */
3405static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3406                                       /*OUT*/Int* nOuts,
3407                                       IRExpr** ins )
3408{
3409   Int i, n;
3410   /* We have to make two passes, one to count, one to copy. */
3411   for (n = 0; ins[n]; n++)
3412      ;
3413   *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3414   *nOuts = n;
3415   /* and now copy .. */
3416   for (i = 0; i < n; i++) {
3417      IRExpr*     arg = ins[i];
3418      TmpOrConst* dst = &(*outs)[i];
3419      irExpr_to_TmpOrConst(dst, arg);
3420   }
3421}
3422
3423typedef
3424   struct {
3425      enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3426             CCall, Load
3427      } tag;
3428      union {
3429         /* unop(tmp) */
3430         struct {
3431            IROp   op;
3432            IRTemp arg;
3433         } Ut;
3434         /* binop(tmp,tmp) */
3435         struct {
3436            IROp   op;
3437            IRTemp arg1;
3438            IRTemp arg2;
3439         } Btt;
3440         /* binop(tmp,const) */
3441         struct {
3442            IROp    op;
3443            IRTemp  arg1;
3444            IRConst con2;
3445         } Btc;
3446         /* binop(const,tmp) */
3447         struct {
3448            IROp    op;
3449            IRConst con1;
3450            IRTemp  arg2;
3451         } Bct;
3452         /* F64i-style const */
3453         struct {
3454            ULong f64i;
3455         } Cf64i;
3456         /* ITE(tmp,tmp,tmp) */
3457         struct {
3458            IRTemp co;
3459            IRTemp e1;
3460            IRTemp e0;
3461         } Ittt;
3462         /* ITE(tmp,tmp,const) */
3463         struct {
3464            IRTemp  co;
3465            IRTemp  e1;
3466            IRConst con0;
3467         } Ittc;
3468         /* ITE(tmp,const,tmp) */
3469         struct {
3470            IRTemp  co;
3471            IRConst con1;
3472            IRTemp  e0;
3473         } Itct;
3474         /* ITE(tmp,const,const) */
3475         struct {
3476            IRTemp  co;
3477            IRConst con1;
3478            IRConst con0;
3479         } Itcc;
3480         /* GetI(descr,tmp,bias)*/
3481         struct {
3482            IRRegArray* descr;
3483            IRTemp      ix;
3484            Int         bias;
3485         } GetIt;
3486         /* Clean helper call */
3487         struct {
3488            IRCallee*   cee;
3489            TmpOrConst* args;
3490            Int         nArgs;
3491            IRType      retty;
3492         } CCall;
3493         /* Load(end,ty,addr) */
3494         struct {
3495            IREndness  end;
3496            IRType     ty;
3497            TmpOrConst addr;
3498         } Load;
3499      } u;
3500   }
3501   AvailExpr;
3502
3503static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3504{
3505   if (LIKELY(a1->tag != a2->tag))
3506      return False;
3507   switch (a1->tag) {
3508      case Ut:
3509         return toBool(
3510                a1->u.Ut.op == a2->u.Ut.op
3511                && a1->u.Ut.arg == a2->u.Ut.arg);
3512      case Btt:
3513         return toBool(
3514                a1->u.Btt.op == a2->u.Btt.op
3515                && a1->u.Btt.arg1 == a2->u.Btt.arg1
3516                && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3517      case Btc:
3518         return toBool(
3519                a1->u.Btc.op == a2->u.Btc.op
3520                && a1->u.Btc.arg1 == a2->u.Btc.arg1
3521                && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3522      case Bct:
3523         return toBool(
3524                a1->u.Bct.op == a2->u.Bct.op
3525                && a1->u.Bct.arg2 == a2->u.Bct.arg2
3526                && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3527      case Cf64i:
3528         return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3529      case Ittt:
3530         return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3531                       && a1->u.Ittt.e1 == a2->u.Ittt.e1
3532                       && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3533      case Ittc:
3534         return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3535                       && a1->u.Ittc.e1 == a2->u.Ittc.e1
3536                       && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3537      case Itct:
3538         return toBool(a1->u.Itct.co == a2->u.Itct.co
3539                       && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3540                       && a1->u.Itct.e0 == a2->u.Itct.e0);
3541      case Itcc:
3542         return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3543                       && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3544                       && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3545      case GetIt:
3546         return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3547                       && a1->u.GetIt.ix == a2->u.GetIt.ix
3548                       && a1->u.GetIt.bias == a2->u.GetIt.bias);
3549      case CCall: {
3550         Int  i, n;
3551         Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3552                   && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3553         if (eq) {
3554            n = a1->u.CCall.nArgs;
3555            for (i = 0; i < n; i++) {
3556               if (!eqTmpOrConst( &a1->u.CCall.args[i],
3557                                  &a2->u.CCall.args[i] )) {
3558                  eq = False;
3559                  break;
3560               }
3561            }
3562         }
3563         if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3564         return eq;
3565      }
3566      case Load: {
3567         Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3568                          && a1->u.Load.ty == a2->u.Load.ty
3569                          && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3570         return eq;
3571      }
3572      default:
3573         vpanic("eq_AvailExpr");
3574   }
3575}
3576
3577static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3578{
3579   IRConst *con, *con0, *con1;
3580   switch (ae->tag) {
3581      case Ut:
3582         return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3583      case Btt:
3584         return IRExpr_Binop( ae->u.Btt.op,
3585                              IRExpr_RdTmp(ae->u.Btt.arg1),
3586                              IRExpr_RdTmp(ae->u.Btt.arg2) );
3587      case Btc:
3588         con = LibVEX_Alloc_inline(sizeof(IRConst));
3589         *con = ae->u.Btc.con2;
3590         return IRExpr_Binop( ae->u.Btc.op,
3591                              IRExpr_RdTmp(ae->u.Btc.arg1),
3592                              IRExpr_Const(con) );
3593      case Bct:
3594         con = LibVEX_Alloc_inline(sizeof(IRConst));
3595         *con = ae->u.Bct.con1;
3596         return IRExpr_Binop( ae->u.Bct.op,
3597                              IRExpr_Const(con),
3598                              IRExpr_RdTmp(ae->u.Bct.arg2) );
3599      case Cf64i:
3600         return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3601      case Ittt:
3602         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3603                           IRExpr_RdTmp(ae->u.Ittt.e1),
3604                           IRExpr_RdTmp(ae->u.Ittt.e0));
3605      case Ittc:
3606         con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3607         *con0 = ae->u.Ittc.con0;
3608         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3609                           IRExpr_RdTmp(ae->u.Ittc.e1),
3610                           IRExpr_Const(con0));
3611      case Itct:
3612         con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3613         *con1 = ae->u.Itct.con1;
3614         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3615                           IRExpr_Const(con1),
3616                           IRExpr_RdTmp(ae->u.Itct.e0));
3617
3618      case Itcc:
3619         con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3620         con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3621         *con0 = ae->u.Itcc.con0;
3622         *con1 = ae->u.Itcc.con1;
3623         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3624                           IRExpr_Const(con1),
3625                           IRExpr_Const(con0));
3626      case GetIt:
3627         return IRExpr_GetI(ae->u.GetIt.descr,
3628                            IRExpr_RdTmp(ae->u.GetIt.ix),
3629                            ae->u.GetIt.bias);
3630      case CCall: {
3631         Int i, n = ae->u.CCall.nArgs;
3632         vassert(n >= 0);
3633         IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3634         vec[n] = NULL;
3635         for (i = 0; i < n; i++) {
3636            vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3637         }
3638         return IRExpr_CCall(ae->u.CCall.cee,
3639                             ae->u.CCall.retty,
3640                             vec);
3641      }
3642      case Load:
3643         return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3644                            tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3645      default:
3646         vpanic("availExpr_to_IRExpr");
3647   }
3648}
3649
3650inline
3651static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3652{
3653   HWord res;
3654   /* env :: IRTemp -> IRTemp */
3655   if (lookupHHW( env, &res, (HWord)tmp ))
3656      return (IRTemp)res;
3657   else
3658      return tmp;
3659}
3660
3661inline
3662static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3663                                          HashHW* env )
3664{
3665   /* env :: IRTemp -> IRTemp */
3666   if (tc->tag == TCt) {
3667      tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3668   }
3669}
3670
3671static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3672{
3673   /* env :: IRTemp -> IRTemp */
3674   switch (ae->tag) {
3675      case Ut:
3676         ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3677         break;
3678      case Btt:
3679         ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3680         ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3681         break;
3682      case Btc:
3683         ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3684         break;
3685      case Bct:
3686         ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3687         break;
3688      case Cf64i:
3689         break;
3690      case Ittt:
3691         ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3692         ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3693         ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3694         break;
3695      case Ittc:
3696         ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3697         ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3698         break;
3699      case Itct:
3700         ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3701         ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3702         break;
3703      case Itcc:
3704         ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3705         break;
3706      case GetIt:
3707         ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3708         break;
3709      case CCall: {
3710         Int i, n = ae->u.CCall.nArgs;;
3711         for (i = 0; i < n; i++) {
3712            subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3713         }
3714         break;
3715      }
3716      case Load:
3717         subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3718         break;
3719      default:
3720         vpanic("subst_AvailExpr");
3721   }
3722}
3723
3724static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3725{
3726   AvailExpr* ae;
3727
3728   switch (e->tag) {
3729      case Iex_Unop:
3730         if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3731            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3732            ae->tag      = Ut;
3733            ae->u.Ut.op  = e->Iex.Unop.op;
3734            ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3735            return ae;
3736         }
3737         break;
3738
3739      case Iex_Binop:
3740         if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3741            if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3742               ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3743               ae->tag        = Btt;
3744               ae->u.Btt.op   = e->Iex.Binop.op;
3745               ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3746               ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3747               return ae;
3748            }
3749            if (e->Iex.Binop.arg2->tag == Iex_Const) {
3750               ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3751               ae->tag        = Btc;
3752               ae->u.Btc.op   = e->Iex.Binop.op;
3753               ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3754               ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3755               return ae;
3756            }
3757         } else if (e->Iex.Binop.arg1->tag == Iex_Const
3758                    && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3759            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3760            ae->tag        = Bct;
3761            ae->u.Bct.op   = e->Iex.Binop.op;
3762            ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3763            ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3764            return ae;
3765         }
3766         break;
3767
3768      case Iex_Const:
3769         if (e->Iex.Const.con->tag == Ico_F64i) {
3770            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3771            ae->tag          = Cf64i;
3772            ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3773            return ae;
3774         }
3775         break;
3776
3777      case Iex_ITE:
3778         if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3779            if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3780               if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3781                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3782                  ae->tag       = Ittt;
3783                  ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3784                  ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3785                  ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3786                  return ae;
3787               }
3788               if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3789                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3790                  ae->tag       = Itct;
3791                  ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3792                  ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3793                  ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3794                  return ae;
3795               }
3796            } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3797               if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3798                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3799                  ae->tag       = Ittc;
3800                  ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3801                  ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3802                  ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3803                  return ae;
3804               }
3805               if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3806                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3807                  ae->tag       = Itcc;
3808                  ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3809                  ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3810                  ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3811                  return ae;
3812               }
3813            }
3814         }
3815         break;
3816
3817      case Iex_GetI:
3818         if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3819            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3820            ae->tag           = GetIt;
3821            ae->u.GetIt.descr = e->Iex.GetI.descr;
3822            ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3823            ae->u.GetIt.bias  = e->Iex.GetI.bias;
3824            return ae;
3825         }
3826         break;
3827
3828      case Iex_CCall:
3829         ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3830         ae->tag = CCall;
3831         /* Ok to share only the cee, since it is immutable. */
3832         ae->u.CCall.cee   = e->Iex.CCall.cee;
3833         ae->u.CCall.retty = e->Iex.CCall.retty;
3834         /* irExprVec_to_TmpOrConsts will assert if the args are
3835            neither tmps nor constants, but that's ok .. that's all they
3836            should be. */
3837         irExprVec_to_TmpOrConsts(
3838                                  &ae->u.CCall.args, &ae->u.CCall.nArgs,
3839                                  e->Iex.CCall.args
3840                                 );
3841         return ae;
3842
3843      case Iex_Load:
3844         /* If the caller of do_cse_BB has requested that loads also
3845            be CSEd, convert them into AvailExprs.  If not, we'll just
3846            return NULL here, and the load never becomes considered
3847            "available", which effectively disables CSEing of them, as
3848            desired. */
3849         if (allowLoadsToBeCSEd) {
3850            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3851            ae->tag        = Load;
3852            ae->u.Load.end = e->Iex.Load.end;
3853            ae->u.Load.ty  = e->Iex.Load.ty;
3854            irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3855            return ae;
3856         }
3857         break;
3858
3859      default:
3860         break;
3861   }
3862
3863   return NULL;
3864}
3865
3866
3867/* The BB is modified in-place.  Returns True if any changes were
3868   made.  The caller can choose whether or not loads should be CSEd.
3869   In the normal course of things we don't do that, since CSEing loads
3870   is something of a dodgy proposition if the guest program is doing
3871   some screwy stuff to do with races and spinloops. */
3872
3873static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3874{
3875   Int        i, j, paranoia;
3876   IRTemp     t, q;
3877   IRStmt*    st;
3878   AvailExpr* eprime;
3879   AvailExpr* ae;
3880   Bool       invalidate;
3881   Bool       anyDone = False;
3882
3883   HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3884   HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3885
3886   vassert(sizeof(IRTemp) <= sizeof(HWord));
3887
3888   if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3889
3890   /* Iterate forwards over the stmts.
3891      On seeing "t = E", where E is one of the AvailExpr forms:
3892         let E' = apply tenv substitution to E
3893         search aenv for E'
3894            if a mapping E' -> q is found,
3895               replace this stmt by "t = q"
3896               and add binding t -> q to tenv
3897            else
3898               add binding E' -> t to aenv
3899               replace this stmt by "t = E'"
3900
3901      Other statements are only interesting to the extent that they
3902      might invalidate some of the expressions in aenv.  So there is
3903      an invalidate-bindings check for each statement seen.
3904   */
3905   for (i = 0; i < bb->stmts_used; i++) {
3906      st = bb->stmts[i];
3907
3908      /* ------ BEGIN invalidate aenv bindings ------ */
3909      /* This is critical: remove from aenv any E' -> .. bindings
3910         which might be invalidated by this statement.  The only
3911         vulnerable kind of bindings are the GetI and Load kinds.
3912            Dirty call - dump (paranoia level -> 2)
3913            Store      - dump (ditto)
3914            Put, PutI  - dump unless no-overlap is proven (.. -> 1)
3915         Uses getAliasingRelation_IC and getAliasingRelation_II
3916         to do the no-overlap assessments needed for Put/PutI.
3917      */
3918      switch (st->tag) {
3919         case Ist_Dirty: case Ist_Store: case Ist_MBE:
3920         case Ist_CAS: case Ist_LLSC:
3921         case Ist_StoreG:
3922            paranoia = 2; break;
3923         case Ist_Put: case Ist_PutI:
3924            paranoia = 1; break;
3925         case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3926         case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3927            paranoia = 0; break;
3928         default:
3929            vpanic("do_cse_BB(1)");
3930      }
3931
3932      if (paranoia > 0) {
3933         for (j = 0; j < aenv->used; j++) {
3934            if (!aenv->inuse[j])
3935               continue;
3936            ae = (AvailExpr*)aenv->key[j];
3937            if (ae->tag != GetIt && ae->tag != Load)
3938               continue;
3939            invalidate = False;
3940            if (paranoia >= 2) {
3941               invalidate = True;
3942            } else {
3943               vassert(paranoia == 1);
3944               if (ae->tag == Load) {
3945                  /* Loads can be invalidated by anything that could
3946                     possibly touch memory.  But in that case we
3947                     should have |paranoia| == 2 and we won't get
3948                     here.  So there's nothing to do; we don't have to
3949                     invalidate the load. */
3950               }
3951               else
3952               if (st->tag == Ist_Put) {
3953                  if (getAliasingRelation_IC(
3954                         ae->u.GetIt.descr,
3955                         IRExpr_RdTmp(ae->u.GetIt.ix),
3956                         st->Ist.Put.offset,
3957                         typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3958                      ) != NoAlias)
3959                     invalidate = True;
3960               }
3961               else
3962               if (st->tag == Ist_PutI) {
3963                  IRPutI *puti = st->Ist.PutI.details;
3964                  if (getAliasingRelation_II(
3965                         ae->u.GetIt.descr,
3966                         IRExpr_RdTmp(ae->u.GetIt.ix),
3967                         ae->u.GetIt.bias,
3968                         puti->descr,
3969                         puti->ix,
3970                         puti->bias
3971                      ) != NoAlias)
3972                     invalidate = True;
3973               }
3974               else
3975                  vpanic("do_cse_BB(2)");
3976            }
3977
3978            if (invalidate) {
3979               aenv->inuse[j] = False;
3980               aenv->key[j]   = (HWord)NULL;  /* be sure */
3981            }
3982         } /* for j */
3983      } /* paranoia > 0 */
3984
3985      /* ------ ENV invalidate aenv bindings ------ */
3986
3987      /* ignore not-interestings */
3988      if (st->tag != Ist_WrTmp)
3989         continue;
3990
3991      t = st->Ist.WrTmp.tmp;
3992      eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3993      /* ignore if not of AvailExpr form */
3994      if (!eprime)
3995         continue;
3996
3997      /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3998
3999      /* apply tenv */
4000      subst_AvailExpr( tenv, eprime );
4001
4002      /* search aenv for eprime, unfortunately the hard way */
4003      for (j = 0; j < aenv->used; j++)
4004         if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4005            break;
4006
4007      if (j < aenv->used) {
4008         /* A binding E' -> q was found.  Replace stmt by "t = q" and
4009            note the t->q binding in tenv. */
4010         /* (this is the core of the CSE action) */
4011         q = (IRTemp)aenv->val[j];
4012         bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4013         addToHHW( tenv, (HWord)t, (HWord)q );
4014         anyDone = True;
4015      } else {
4016         /* No binding was found, so instead we add E' -> t to our
4017            collection of available expressions, replace this stmt
4018            with "t = E'", and move on. */
4019         bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4020         addToHHW( aenv, (HWord)eprime, (HWord)t );
4021      }
4022   }
4023
4024   /*
4025   ppIRSB(bb);
4026   sanityCheckIRSB(bb, Ity_I32);
4027   vex_printf("\n\n");
4028   */
4029   return anyDone;
4030}
4031
4032
4033/*---------------------------------------------------------------*/
4034/*--- Add32/Sub32 chain collapsing                            ---*/
4035/*---------------------------------------------------------------*/
4036
4037/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4038
4039/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
4040   yes, set *tmp and *i32 appropriately.  *i32 is set as if the
4041   root node is Add32, not Sub32. */
4042
4043static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4044{
4045   if (e->tag != Iex_Binop)
4046      return False;
4047   if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4048      return False;
4049   if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4050      return False;
4051   if (e->Iex.Binop.arg2->tag != Iex_Const)
4052      return False;
4053   *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4054   *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4055   if (e->Iex.Binop.op == Iop_Sub32)
4056      *i32 = -*i32;
4057   return True;
4058}
4059
4060
4061/* Figure out if tmp can be expressed as tmp2 +32 const, for some
4062   other tmp2.  Scan backwards from the specified start point -- an
4063   optimisation. */
4064
4065static Bool collapseChain ( IRSB* bb, Int startHere,
4066                            IRTemp tmp,
4067                            IRTemp* tmp2, Int* i32 )
4068{
4069   Int     j, ii;
4070   IRTemp  vv;
4071   IRStmt* st;
4072   IRExpr* e;
4073
4074   /* the (var, con) pair contain the current 'representation' for
4075      'tmp'.  We start with 'tmp + 0'.  */
4076   IRTemp var = tmp;
4077   Int    con = 0;
4078
4079   /* Scan backwards to see if tmp can be replaced by some other tmp
4080     +/- a constant. */
4081   for (j = startHere; j >= 0; j--) {
4082      st = bb->stmts[j];
4083      if (st->tag != Ist_WrTmp)
4084         continue;
4085      if (st->Ist.WrTmp.tmp != var)
4086         continue;
4087      e = st->Ist.WrTmp.data;
4088      if (!isAdd32OrSub32(e, &vv, &ii))
4089         break;
4090      var = vv;
4091      con += ii;
4092   }
4093   if (j == -1)
4094      /* no earlier binding for var .. ill-formed IR */
4095      vpanic("collapseChain");
4096
4097   /* so, did we find anything interesting? */
4098   if (var == tmp)
4099      return False; /* no .. */
4100
4101   *tmp2 = var;
4102   *i32  = con;
4103   return True;
4104}
4105
4106
4107/* ------- Main function for Add32/Sub32 chain collapsing ------ */
4108
4109static void collapse_AddSub_chains_BB ( IRSB* bb )
4110{
4111   IRStmt *st;
4112   IRTemp var, var2;
4113   Int    i, con, con2;
4114
4115   for (i = bb->stmts_used-1; i >= 0; i--) {
4116      st = bb->stmts[i];
4117      if (st->tag == Ist_NoOp)
4118         continue;
4119
4120      /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4121
4122      if (st->tag == Ist_WrTmp
4123          && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4124
4125         /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4126            Find out if var can be expressed as var2 + con2. */
4127         if (collapseChain(bb, i-1, var, &var2, &con2)) {
4128            if (DEBUG_IROPT) {
4129               vex_printf("replacing1 ");
4130               ppIRStmt(st);
4131               vex_printf(" with ");
4132            }
4133            con2 += con;
4134            bb->stmts[i]
4135               = IRStmt_WrTmp(
4136                    st->Ist.WrTmp.tmp,
4137                    (con2 >= 0)
4138                      ? IRExpr_Binop(Iop_Add32,
4139                                     IRExpr_RdTmp(var2),
4140                                     IRExpr_Const(IRConst_U32(con2)))
4141                      : IRExpr_Binop(Iop_Sub32,
4142                                     IRExpr_RdTmp(var2),
4143                                     IRExpr_Const(IRConst_U32(-con2)))
4144                 );
4145            if (DEBUG_IROPT) {
4146               ppIRStmt(bb->stmts[i]);
4147               vex_printf("\n");
4148            }
4149         }
4150
4151         continue;
4152      }
4153
4154      /* Try to collapse 't1 = GetI[t2, con]'. */
4155
4156      if (st->tag == Ist_WrTmp
4157          && st->Ist.WrTmp.data->tag == Iex_GetI
4158          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4159          && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4160                                      ->Iex.RdTmp.tmp, &var2, &con2)) {
4161         if (DEBUG_IROPT) {
4162            vex_printf("replacing3 ");
4163            ppIRStmt(st);
4164            vex_printf(" with ");
4165         }
4166         con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4167         bb->stmts[i]
4168            = IRStmt_WrTmp(
4169                 st->Ist.WrTmp.tmp,
4170                 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4171                             IRExpr_RdTmp(var2),
4172                             con2));
4173         if (DEBUG_IROPT) {
4174            ppIRStmt(bb->stmts[i]);
4175            vex_printf("\n");
4176         }
4177         continue;
4178      }
4179
4180      /* Perhaps st is PutI[t, con] ? */
4181      IRPutI *puti = st->Ist.PutI.details;
4182      if (st->tag == Ist_PutI
4183          && puti->ix->tag == Iex_RdTmp
4184          && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4185                               &var2, &con2)) {
4186         if (DEBUG_IROPT) {
4187            vex_printf("replacing2 ");
4188            ppIRStmt(st);
4189            vex_printf(" with ");
4190         }
4191         con2 += puti->bias;
4192         bb->stmts[i]
4193            = IRStmt_PutI(mkIRPutI(puti->descr,
4194                                   IRExpr_RdTmp(var2),
4195                                   con2,
4196                                   puti->data));
4197         if (DEBUG_IROPT) {
4198            ppIRStmt(bb->stmts[i]);
4199            vex_printf("\n");
4200         }
4201         continue;
4202      }
4203
4204   } /* for */
4205}
4206
4207
4208/*---------------------------------------------------------------*/
4209/*--- PutI/GetI transformations                               ---*/
4210/*---------------------------------------------------------------*/
4211
4212/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4213   the given starting point to find, if any, a PutI which writes
4214   exactly the same piece of guest state, and so return the expression
4215   that the PutI writes.  This is the core of PutI-GetI forwarding. */
4216
4217static
4218IRExpr* findPutI ( IRSB* bb, Int startHere,
4219                   IRRegArray* descrG, IRExpr* ixG, Int biasG )
4220{
4221   Int        j;
4222   IRStmt*    st;
4223   GSAliasing relation;
4224
4225   if (0) {
4226      vex_printf("\nfindPutI ");
4227      ppIRRegArray(descrG);
4228      vex_printf(" ");
4229      ppIRExpr(ixG);
4230      vex_printf(" %d\n", biasG);
4231   }
4232
4233   /* Scan backwards in bb from startHere to find a suitable PutI
4234      binding for (descrG, ixG, biasG), if any. */
4235
4236   for (j = startHere; j >= 0; j--) {
4237      st = bb->stmts[j];
4238      if (st->tag == Ist_NoOp)
4239         continue;
4240
4241      if (st->tag == Ist_Put) {
4242         /* Non-indexed Put.  This can't give a binding, but we do
4243            need to check it doesn't invalidate the search by
4244            overlapping any part of the indexed guest state. */
4245
4246         relation
4247            = getAliasingRelation_IC(
4248                 descrG, ixG,
4249                 st->Ist.Put.offset,
4250                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4251
4252         if (relation == NoAlias) {
4253            /* we're OK; keep going */
4254            continue;
4255         } else {
4256            /* relation == UnknownAlias || relation == ExactAlias */
4257            /* If this assertion fails, we've found a Put which writes
4258               an area of guest state which is read by a GetI.  Which
4259               is unlikely (although not per se wrong). */
4260            vassert(relation != ExactAlias);
4261            /* This Put potentially writes guest state that the GetI
4262               reads; we must fail. */
4263            return NULL;
4264         }
4265      }
4266
4267      if (st->tag == Ist_PutI) {
4268         IRPutI *puti = st->Ist.PutI.details;
4269
4270         relation = getAliasingRelation_II(
4271                       descrG, ixG, biasG,
4272                       puti->descr,
4273                       puti->ix,
4274                       puti->bias
4275                    );
4276
4277         if (relation == NoAlias) {
4278            /* This PutI definitely doesn't overlap.  Ignore it and
4279               keep going. */
4280            continue; /* the for j loop */
4281         }
4282
4283         if (relation == UnknownAlias) {
4284            /* We don't know if this PutI writes to the same guest
4285               state that the GetI, or not.  So we have to give up. */
4286            return NULL;
4287         }
4288
4289         /* Otherwise, we've found what we're looking for.  */
4290         vassert(relation == ExactAlias);
4291         return puti->data;
4292
4293      } /* if (st->tag == Ist_PutI) */
4294
4295      if (st->tag == Ist_Dirty) {
4296         /* Be conservative.  If the dirty call has any guest effects at
4297            all, give up.  We could do better -- only give up if there
4298            are any guest writes/modifies. */
4299         if (st->Ist.Dirty.details->nFxState > 0)
4300            return NULL;
4301      }
4302
4303   } /* for */
4304
4305   /* No valid replacement was found. */
4306   return NULL;
4307}
4308
4309
4310
4311/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4312   that it writes exactly the same piece of guest state) ?  Safe
4313   answer: False. */
4314
4315static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4316{
4317   vassert(pi->tag == Ist_PutI);
4318   if (s2->tag != Ist_PutI)
4319      return False;
4320
4321   IRPutI *p1 = pi->Ist.PutI.details;
4322   IRPutI *p2 = s2->Ist.PutI.details;
4323
4324   return toBool(
4325          getAliasingRelation_II(
4326             p1->descr, p1->ix, p1->bias,
4327             p2->descr, p2->ix, p2->bias
4328          )
4329          == ExactAlias
4330          );
4331}
4332
4333
4334/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4335   overlap it?  Safe answer: True.  Note, we could do a lot better
4336   than this if needed. */
4337
4338static
4339Bool guestAccessWhichMightOverlapPutI (
4340        IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4341     )
4342{
4343   GSAliasing relation;
4344   UInt       minoffP, maxoffP;
4345
4346   vassert(pi->tag == Ist_PutI);
4347
4348   IRPutI *p1 = pi->Ist.PutI.details;
4349
4350   getArrayBounds(p1->descr, &minoffP, &maxoffP);
4351   switch (s2->tag) {
4352
4353      case Ist_NoOp:
4354      case Ist_IMark:
4355         return False;
4356
4357      case Ist_MBE:
4358      case Ist_AbiHint:
4359         /* just be paranoid ... these should be rare. */
4360         return True;
4361
4362      case Ist_CAS:
4363         /* This is unbelievably lame, but it's probably not
4364            significant from a performance point of view.  Really, a
4365            CAS is a load-store op, so it should be safe to say False.
4366            However .. */
4367         return True;
4368
4369      case Ist_Dirty:
4370         /* If the dirty call has any guest effects at all, give up.
4371            Probably could do better. */
4372         if (s2->Ist.Dirty.details->nFxState > 0)
4373            return True;
4374         return False;
4375
4376      case Ist_Put:
4377         vassert(isIRAtom(s2->Ist.Put.data));
4378         relation
4379            = getAliasingRelation_IC(
4380                 p1->descr, p1->ix,
4381                 s2->Ist.Put.offset,
4382                 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4383              );
4384         goto have_relation;
4385
4386      case Ist_PutI: {
4387         IRPutI *p2 = s2->Ist.PutI.details;
4388
4389         vassert(isIRAtom(p2->ix));
4390         vassert(isIRAtom(p2->data));
4391         relation
4392            = getAliasingRelation_II(
4393                 p1->descr, p1->ix, p1->bias,
4394                 p2->descr, p2->ix, p2->bias
4395              );
4396         goto have_relation;
4397      }
4398
4399      case Ist_WrTmp:
4400         if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4401            relation
4402               = getAliasingRelation_II(
4403                    p1->descr, p1->ix, p1->bias,
4404                    s2->Ist.WrTmp.data->Iex.GetI.descr,
4405                    s2->Ist.WrTmp.data->Iex.GetI.ix,
4406                    s2->Ist.WrTmp.data->Iex.GetI.bias
4407                 );
4408            goto have_relation;
4409         }
4410         if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4411            relation
4412               = getAliasingRelation_IC(
4413                    p1->descr, p1->ix,
4414                    s2->Ist.WrTmp.data->Iex.Get.offset,
4415                    s2->Ist.WrTmp.data->Iex.Get.ty
4416                 );
4417            goto have_relation;
4418         }
4419         return False;
4420
4421      case Ist_Store:
4422         vassert(isIRAtom(s2->Ist.Store.addr));
4423         vassert(isIRAtom(s2->Ist.Store.data));
4424         return False;
4425
4426      default:
4427         vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4428         vpanic("guestAccessWhichMightOverlapPutI");
4429   }
4430
4431  have_relation:
4432   if (relation == NoAlias)
4433      return False;
4434   else
4435      return True; /* ExactAlias or UnknownAlias */
4436}
4437
4438
4439
4440/* ---------- PutI/GetI transformations main functions --------- */
4441
4442/* Remove redundant GetIs, to the extent that they can be detected.
4443   bb is modified in-place. */
4444
4445static
4446void do_redundant_GetI_elimination ( IRSB* bb )
4447{
4448   Int     i;
4449   IRStmt* st;
4450
4451   for (i = bb->stmts_used-1; i >= 0; i--) {
4452      st = bb->stmts[i];
4453      if (st->tag == Ist_NoOp)
4454         continue;
4455
4456      if (st->tag == Ist_WrTmp
4457          && st->Ist.WrTmp.data->tag == Iex_GetI
4458          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4459         IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4460         IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
4461         Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
4462         IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
4463         if (replacement
4464             && isIRAtom(replacement)
4465             /* Make sure we're doing a type-safe transformation! */
4466             && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4467            if (DEBUG_IROPT) {
4468               vex_printf("rGI:  ");
4469               ppIRExpr(st->Ist.WrTmp.data);
4470               vex_printf(" -> ");
4471               ppIRExpr(replacement);
4472               vex_printf("\n");
4473            }
4474            bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4475         }
4476      }
4477   }
4478
4479}
4480
4481
4482/* Remove redundant PutIs, to the extent which they can be detected.
4483   bb is modified in-place. */
4484
4485static
4486void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4487{
4488   Int    i, j;
4489   Bool   delete;
4490   IRStmt *st, *stj;
4491
4492   vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4493
4494   for (i = 0; i < bb->stmts_used; i++) {
4495      st = bb->stmts[i];
4496      if (st->tag != Ist_PutI)
4497         continue;
4498      /* Ok, search forwards from here to see if we can find another
4499         PutI which makes this one redundant, and dodging various
4500         hazards.  Search forwards:
4501         * If conditional exit, give up (because anything after that
4502           does not postdominate this put).
4503         * If a Get which might overlap, give up (because this PutI
4504           not necessarily dead).
4505         * If a Put which is identical, stop with success.
4506         * If a Put which might overlap, but is not identical, give up.
4507         * If a dirty helper call which might write guest state, give up.
4508         * If a Put which definitely doesn't overlap, or any other
4509           kind of stmt, continue.
4510      */
4511      delete = False;
4512      for (j = i+1; j < bb->stmts_used; j++) {
4513         stj = bb->stmts[j];
4514         if (stj->tag == Ist_NoOp)
4515            continue;
4516         if (identicalPutIs(st, stj)) {
4517            /* success! */
4518            delete = True;
4519            break;
4520         }
4521         if (stj->tag == Ist_Exit)
4522            /* give up */
4523            break;
4524         if (st->tag == Ist_Dirty)
4525            /* give up; could do better here */
4526            break;
4527         if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4528            /* give up */
4529           break;
4530      }
4531
4532      if (delete) {
4533         if (DEBUG_IROPT) {
4534            vex_printf("rPI:  ");
4535            ppIRStmt(st);
4536            vex_printf("\n");
4537         }
4538         bb->stmts[i] = IRStmt_NoOp();
4539      }
4540
4541   }
4542}
4543
4544
4545/*---------------------------------------------------------------*/
4546/*--- Loop unrolling                                          ---*/
4547/*---------------------------------------------------------------*/
4548
4549/* Adjust all tmp values (names) in e by delta.  e is destructively
4550   modified. */
4551
4552static void deltaIRExpr ( IRExpr* e, Int delta )
4553{
4554   Int i;
4555   switch (e->tag) {
4556      case Iex_RdTmp:
4557         e->Iex.RdTmp.tmp += delta;
4558         break;
4559      case Iex_Get:
4560      case Iex_Const:
4561         break;
4562      case Iex_GetI:
4563         deltaIRExpr(e->Iex.GetI.ix, delta);
4564         break;
4565      case Iex_Qop:
4566         deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4567         deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4568         deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4569         deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4570         break;
4571      case Iex_Triop:
4572         deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4573         deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4574         deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4575         break;
4576      case Iex_Binop:
4577         deltaIRExpr(e->Iex.Binop.arg1, delta);
4578         deltaIRExpr(e->Iex.Binop.arg2, delta);
4579         break;
4580      case Iex_Unop:
4581         deltaIRExpr(e->Iex.Unop.arg, delta);
4582         break;
4583      case Iex_Load:
4584         deltaIRExpr(e->Iex.Load.addr, delta);
4585         break;
4586      case Iex_CCall:
4587         for (i = 0; e->Iex.CCall.args[i]; i++)
4588            deltaIRExpr(e->Iex.CCall.args[i], delta);
4589         break;
4590      case Iex_ITE:
4591         deltaIRExpr(e->Iex.ITE.cond, delta);
4592         deltaIRExpr(e->Iex.ITE.iftrue, delta);
4593         deltaIRExpr(e->Iex.ITE.iffalse, delta);
4594         break;
4595      default:
4596         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4597         vpanic("deltaIRExpr");
4598   }
4599}
4600
4601/* Adjust all tmp values (names) in st by delta.  st is destructively
4602   modified. */
4603
4604static void deltaIRStmt ( IRStmt* st, Int delta )
4605{
4606   Int      i;
4607   IRDirty* d;
4608   switch (st->tag) {
4609      case Ist_NoOp:
4610      case Ist_IMark:
4611      case Ist_MBE:
4612         break;
4613      case Ist_AbiHint:
4614         deltaIRExpr(st->Ist.AbiHint.base, delta);
4615         deltaIRExpr(st->Ist.AbiHint.nia, delta);
4616         break;
4617      case Ist_Put:
4618         deltaIRExpr(st->Ist.Put.data, delta);
4619         break;
4620      case Ist_PutI:
4621         deltaIRExpr(st->Ist.PutI.details->ix, delta);
4622         deltaIRExpr(st->Ist.PutI.details->data, delta);
4623         break;
4624      case Ist_WrTmp:
4625         st->Ist.WrTmp.tmp += delta;
4626         deltaIRExpr(st->Ist.WrTmp.data, delta);
4627         break;
4628      case Ist_Exit:
4629         deltaIRExpr(st->Ist.Exit.guard, delta);
4630         break;
4631      case Ist_Store:
4632         deltaIRExpr(st->Ist.Store.addr, delta);
4633         deltaIRExpr(st->Ist.Store.data, delta);
4634         break;
4635      case Ist_StoreG: {
4636         IRStoreG* sg = st->Ist.StoreG.details;
4637         deltaIRExpr(sg->addr, delta);
4638         deltaIRExpr(sg->data, delta);
4639         deltaIRExpr(sg->guard, delta);
4640         break;
4641      }
4642      case Ist_LoadG: {
4643         IRLoadG* lg = st->Ist.LoadG.details;
4644         lg->dst += delta;
4645         deltaIRExpr(lg->addr, delta);
4646         deltaIRExpr(lg->alt, delta);
4647         deltaIRExpr(lg->guard, delta);
4648         break;
4649      }
4650      case Ist_CAS:
4651         if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4652            st->Ist.CAS.details->oldHi += delta;
4653         st->Ist.CAS.details->oldLo += delta;
4654         deltaIRExpr(st->Ist.CAS.details->addr, delta);
4655         if (st->Ist.CAS.details->expdHi)
4656            deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4657         deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4658         if (st->Ist.CAS.details->dataHi)
4659            deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4660         deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4661         break;
4662      case Ist_LLSC:
4663         st->Ist.LLSC.result += delta;
4664         deltaIRExpr(st->Ist.LLSC.addr, delta);
4665         if (st->Ist.LLSC.storedata)
4666            deltaIRExpr(st->Ist.LLSC.storedata, delta);
4667         break;
4668      case Ist_Dirty:
4669         d = st->Ist.Dirty.details;
4670         deltaIRExpr(d->guard, delta);
4671         for (i = 0; d->args[i]; i++) {
4672            IRExpr* arg = d->args[i];
4673            if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
4674               deltaIRExpr(arg, delta);
4675         }
4676         if (d->tmp != IRTemp_INVALID)
4677            d->tmp += delta;
4678         if (d->mAddr)
4679            deltaIRExpr(d->mAddr, delta);
4680         break;
4681      default:
4682         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4683         vpanic("deltaIRStmt");
4684   }
4685}
4686
4687
4688/* If possible, return a loop-unrolled version of bb0.  The original
4689   is changed.  If not possible, return NULL.  */
4690
4691/* The two schemas considered are:
4692
4693     X: BODY; goto X
4694
4695     which unrolls to (eg)  X: BODY;BODY; goto X
4696
4697   and
4698
4699       X: BODY; if (c) goto X; goto Y
4700   which trivially transforms to
4701       X: BODY; if (!c) goto Y; goto X;
4702   so it falls in the scope of the first case.
4703
4704   X and Y must be literal (guest) addresses.
4705*/
4706
4707static Int calc_unroll_factor( IRSB* bb )
4708{
4709   Int n_stmts, i;
4710
4711   n_stmts = 0;
4712   for (i = 0; i < bb->stmts_used; i++) {
4713      if (bb->stmts[i]->tag != Ist_NoOp)
4714         n_stmts++;
4715   }
4716
4717   if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4718      if (vex_control.iropt_verbosity > 0)
4719         vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4720                    n_stmts, 8* n_stmts);
4721      return 8;
4722   }
4723   if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4724      if (vex_control.iropt_verbosity > 0)
4725         vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4726                    n_stmts, 4* n_stmts);
4727      return 4;
4728   }
4729
4730   if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4731      if (vex_control.iropt_verbosity > 0)
4732         vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4733                    n_stmts, 2* n_stmts);
4734      return 2;
4735   }
4736
4737   if (vex_control.iropt_verbosity > 0)
4738      vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4739
4740   return 1;
4741}
4742
4743
4744static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4745{
4746   Int      i, j, jmax, n_vars;
4747   Bool     xxx_known;
4748   Addr64   xxx_value, yyy_value;
4749   IRExpr*  udst;
4750   IRStmt*  st;
4751   IRConst* con;
4752   IRSB     *bb1, *bb2;
4753   Int      unroll_factor;
4754
4755   if (vex_control.iropt_unroll_thresh <= 0)
4756      return NULL;
4757
4758   /* First off, figure out if we can unroll this loop.  Do this
4759      without modifying bb0. */
4760
4761   if (bb0->jumpkind != Ijk_Boring)
4762      return NULL;
4763
4764   xxx_known = False;
4765   xxx_value = 0;
4766
4767   /* Extract the next-guest address.  If it isn't a literal, we
4768      have to give up. */
4769
4770   udst = bb0->next;
4771   if (udst->tag == Iex_Const
4772       && (udst->Iex.Const.con->tag == Ico_U32
4773           || udst->Iex.Const.con->tag == Ico_U64)) {
4774      /* The BB ends in a jump to a literal location. */
4775      xxx_known = True;
4776      xxx_value = udst->Iex.Const.con->tag == Ico_U64
4777                    ?  udst->Iex.Const.con->Ico.U64
4778                    : (Addr64)(udst->Iex.Const.con->Ico.U32);
4779   }
4780
4781   if (!xxx_known)
4782      return NULL;
4783
4784   /* Now we know the BB ends to a jump to a literal location.  If
4785      it's a jump to itself (viz, idiom #1), move directly to the
4786      unrolling stage, first cloning the bb so the original isn't
4787      modified. */
4788   if (xxx_value == my_addr) {
4789      unroll_factor = calc_unroll_factor( bb0 );
4790      if (unroll_factor < 2)
4791         return NULL;
4792      bb1 = deepCopyIRSB( bb0 );
4793      bb0 = NULL;
4794      udst = NULL; /* is now invalid */
4795      goto do_unroll;
4796   }
4797
4798   /* Search for the second idiomatic form:
4799        X: BODY; if (c) goto X; goto Y
4800      We know Y, but need to establish that the last stmt
4801      is 'if (c) goto X'.
4802   */
4803   yyy_value = xxx_value;
4804   for (i = bb0->stmts_used-1; i >= 0; i--)
4805      if (bb0->stmts[i])
4806         break;
4807
4808   if (i < 0)
4809      return NULL; /* block with no stmts.  Strange. */
4810
4811   st = bb0->stmts[i];
4812   if (st->tag != Ist_Exit)
4813      return NULL;
4814   if (st->Ist.Exit.jk != Ijk_Boring)
4815      return NULL;
4816
4817   con = st->Ist.Exit.dst;
4818   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4819
4820   xxx_value = con->tag == Ico_U64
4821                  ? st->Ist.Exit.dst->Ico.U64
4822                  : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4823
4824   /* If this assertion fails, we have some kind of type error. */
4825   vassert(con->tag == udst->Iex.Const.con->tag);
4826
4827   if (xxx_value != my_addr)
4828      /* We didn't find either idiom.  Give up. */
4829      return NULL;
4830
4831   /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
4832      yyy values (which makes it look like idiom #1), and go into
4833      unrolling proper.  This means finding (again) the last stmt, in
4834      the copied BB. */
4835
4836   unroll_factor = calc_unroll_factor( bb0 );
4837   if (unroll_factor < 2)
4838      return NULL;
4839
4840   bb1 = deepCopyIRSB( bb0 );
4841   bb0 = NULL;
4842   udst = NULL; /* is now invalid */
4843   for (i = bb1->stmts_used-1; i >= 0; i--)
4844      if (bb1->stmts[i])
4845         break;
4846
4847   /* The next bunch of assertions should be true since we already
4848      found and checked the last stmt in the original bb. */
4849
4850   vassert(i >= 0);
4851
4852   st = bb1->stmts[i];
4853   vassert(st->tag == Ist_Exit);
4854
4855   con = st->Ist.Exit.dst;
4856   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4857
4858   udst = bb1->next;
4859   vassert(udst->tag == Iex_Const);
4860   vassert(udst->Iex.Const.con->tag == Ico_U32
4861          || udst->Iex.Const.con->tag == Ico_U64);
4862   vassert(con->tag == udst->Iex.Const.con->tag);
4863
4864   /* switch the xxx and yyy fields around */
4865   if (con->tag == Ico_U64) {
4866      udst->Iex.Const.con->Ico.U64 = xxx_value;
4867      con->Ico.U64 = yyy_value;
4868   } else {
4869      udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4870      con->Ico.U32 = (UInt)yyy_value;
4871   }
4872
4873   /* negate the test condition */
4874   st->Ist.Exit.guard
4875      = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4876
4877   /* --- The unroller proper.  Both idioms are by now --- */
4878   /* --- now converted to idiom 1. --- */
4879
4880  do_unroll:
4881
4882   vassert(unroll_factor == 2
4883           || unroll_factor == 4
4884           || unroll_factor == 8);
4885
4886   jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4887   for (j = 1; j <= jmax; j++) {
4888
4889      n_vars = bb1->tyenv->types_used;
4890
4891      bb2 = deepCopyIRSB(bb1);
4892      for (i = 0; i < n_vars; i++)
4893         (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4894
4895      for (i = 0; i < bb2->stmts_used; i++) {
4896         /* deltaIRStmt destructively modifies the stmt, but
4897            that's OK since bb2 is a complete fresh copy of bb1. */
4898         deltaIRStmt(bb2->stmts[i], n_vars);
4899         addStmtToIRSB(bb1, bb2->stmts[i]);
4900      }
4901   }
4902
4903   if (DEBUG_IROPT) {
4904      vex_printf("\nUNROLLED (%lx)\n", my_addr);
4905      ppIRSB(bb1);
4906      vex_printf("\n");
4907   }
4908
4909   /* Flattening; sigh.  The unroller succeeds in breaking flatness
4910      by negating the test condition.  This should be fixed properly.
4911      For the moment use this shotgun approach.  */
4912   return flatten_BB(bb1);
4913}
4914
4915
4916/*---------------------------------------------------------------*/
4917/*--- The tree builder                                        ---*/
4918/*---------------------------------------------------------------*/
4919
4920/* This isn't part of IR optimisation.  Really it's a pass done prior
4921   to instruction selection, which improves the code that the
4922   instruction selector can produce. */
4923
4924/* --- The 'tmp' environment is the central data structure here --- */
4925
4926/* The number of outstanding bindings we're prepared to track.
4927   The number of times the env becomes full and we have to dump
4928   the oldest binding (hence reducing code quality) falls very
4929   rapidly as the env size increases.  8 gives reasonable performance
4930   under most circumstances. */
4931#define A_NENV 10
4932
4933/* An interval. Used to record the bytes in the guest state accessed
4934   by a Put[I] statement or by (one or more) Get[I] expression(s). In
4935   case of several Get[I] expressions, the lower/upper bounds are recorded.
4936   This is conservative but cheap.
4937   E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4938   E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4939   offset 200 would be recorded as [100,203] */
4940typedef
4941   struct {
4942      Bool present;
4943      Int  low;
4944      Int  high;
4945   }
4946   Interval;
4947
4948static inline Bool
4949intervals_overlap(Interval i1, Interval i2)
4950{
4951   return (i1.low >= i2.low && i1.low <= i2.high) ||
4952          (i2.low >= i1.low && i2.low <= i1.high);
4953}
4954
4955static inline void
4956update_interval(Interval *i, Int low, Int high)
4957{
4958   vassert(low <= high);
4959
4960   if (i->present) {
4961      if (low  < i->low)  i->low  = low;
4962      if (high > i->high) i->high = high;
4963   } else {
4964      i->present = True;
4965      i->low  = low;
4966      i->high = high;
4967   }
4968}
4969
4970
4971/* bindee == NULL   ===  slot is not in use
4972   bindee != NULL   ===  slot is in use
4973*/
4974typedef
4975   struct {
4976      IRTemp  binder;
4977      IRExpr* bindee;
4978      Bool    doesLoad;
4979      /* Record the bytes of the guest state BINDEE reads from. */
4980      Interval getInterval;
4981   }
4982   ATmpInfo;
4983
4984__attribute__((unused))
4985static void ppAEnv ( ATmpInfo* env )
4986{
4987   Int i;
4988   for (i = 0; i < A_NENV; i++) {
4989      vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
4990      if (env[i].bindee)
4991         ppIRExpr(env[i].bindee);
4992      else
4993         vex_printf("(null)");
4994      vex_printf("\n");
4995   }
4996}
4997
4998/* --- Tree-traversal fns --- */
4999
5000/* Traverse an expr, and detect if any part of it reads memory or does
5001   a Get.  Be careful ... this really controls how much the
5002   tree-builder can reorder the code, so getting it right is critical.
5003*/
5004static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5005{
5006   Int i;
5007   switch (e->tag) {
5008      case Iex_CCall:
5009         for (i = 0; e->Iex.CCall.args[i]; i++)
5010            setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5011         return;
5012      case Iex_ITE:
5013         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5014         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5015         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5016         return;
5017      case Iex_Qop:
5018         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5019         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5020         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5021         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5022         return;
5023      case Iex_Triop:
5024         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5025         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5026         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5027         return;
5028      case Iex_Binop:
5029         setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5030         setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5031         return;
5032      case Iex_Unop:
5033         setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5034         return;
5035      case Iex_Load:
5036         *doesLoad = True;
5037         setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5038         return;
5039      case Iex_Get: {
5040         Int low = e->Iex.Get.offset;
5041         Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5042         update_interval(getInterval, low, high);
5043         return;
5044      }
5045      case Iex_GetI: {
5046         IRRegArray *descr = e->Iex.GetI.descr;
5047         Int size = sizeofIRType(descr->elemTy);
5048         Int low  = descr->base;
5049         Int high = low + descr->nElems * size - 1;
5050         update_interval(getInterval, low, high);
5051         setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5052         return;
5053      }
5054      case Iex_RdTmp:
5055      case Iex_Const:
5056         return;
5057      default:
5058         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5059         vpanic("setHints_Expr");
5060   }
5061}
5062
5063
5064/* Add a binding to the front of the env and slide all the rest
5065   backwards.  It should be the case that the last slot is free. */
5066static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5067{
5068   Int i;
5069   vassert(env[A_NENV-1].bindee == NULL);
5070   for (i = A_NENV-1; i >= 1; i--)
5071      env[i] = env[i-1];
5072   env[0].binder   = binder;
5073   env[0].bindee   = bindee;
5074   env[0].doesLoad = False; /* filled in later */
5075   env[0].getInterval.present  = False; /* filled in later */
5076   env[0].getInterval.low  = -1; /* filled in later */
5077   env[0].getInterval.high = -1; /* filled in later */
5078}
5079
5080/* Given uses :: array of UShort, indexed by IRTemp
5081   Add the use-occurrences of temps in this expression
5082   to the env.
5083*/
5084static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5085{
5086   Int i;
5087
5088   switch (e->tag) {
5089
5090      case Iex_RdTmp: /* the only interesting case */
5091         uses[e->Iex.RdTmp.tmp]++;
5092         return;
5093
5094      case Iex_ITE:
5095         aoccCount_Expr(uses, e->Iex.ITE.cond);
5096         aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5097         aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5098         return;
5099
5100      case Iex_Qop:
5101         aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5102         aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5103         aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5104         aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5105         return;
5106
5107      case Iex_Triop:
5108         aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5109         aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5110         aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5111         return;
5112
5113      case Iex_Binop:
5114         aoccCount_Expr(uses, e->Iex.Binop.arg1);
5115         aoccCount_Expr(uses, e->Iex.Binop.arg2);
5116         return;
5117
5118      case Iex_Unop:
5119         aoccCount_Expr(uses, e->Iex.Unop.arg);
5120         return;
5121
5122      case Iex_Load:
5123         aoccCount_Expr(uses, e->Iex.Load.addr);
5124         return;
5125
5126      case Iex_CCall:
5127         for (i = 0; e->Iex.CCall.args[i]; i++)
5128            aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5129         return;
5130
5131      case Iex_GetI:
5132         aoccCount_Expr(uses, e->Iex.GetI.ix);
5133         return;
5134
5135      case Iex_Const:
5136      case Iex_Get:
5137         return;
5138
5139      default:
5140         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5141         vpanic("aoccCount_Expr");
5142    }
5143}
5144
5145
5146/* Given uses :: array of UShort, indexed by IRTemp
5147   Add the use-occurrences of temps in this statement
5148   to the env.
5149*/
5150static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5151{
5152   Int      i;
5153   IRDirty* d;
5154   IRCAS*   cas;
5155   switch (st->tag) {
5156      case Ist_AbiHint:
5157         aoccCount_Expr(uses, st->Ist.AbiHint.base);
5158         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5159         return;
5160      case Ist_WrTmp:
5161         aoccCount_Expr(uses, st->Ist.WrTmp.data);
5162         return;
5163      case Ist_Put:
5164         aoccCount_Expr(uses, st->Ist.Put.data);
5165         return;
5166      case Ist_PutI:
5167         aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5168         aoccCount_Expr(uses, st->Ist.PutI.details->data);
5169         return;
5170      case Ist_Store:
5171         aoccCount_Expr(uses, st->Ist.Store.addr);
5172         aoccCount_Expr(uses, st->Ist.Store.data);
5173         return;
5174      case Ist_StoreG: {
5175         IRStoreG* sg = st->Ist.StoreG.details;
5176         aoccCount_Expr(uses, sg->addr);
5177         aoccCount_Expr(uses, sg->data);
5178         aoccCount_Expr(uses, sg->guard);
5179         return;
5180      }
5181      case Ist_LoadG: {
5182         IRLoadG* lg = st->Ist.LoadG.details;
5183         aoccCount_Expr(uses, lg->addr);
5184         aoccCount_Expr(uses, lg->alt);
5185         aoccCount_Expr(uses, lg->guard);
5186         return;
5187      }
5188      case Ist_CAS:
5189         cas = st->Ist.CAS.details;
5190         aoccCount_Expr(uses, cas->addr);
5191         if (cas->expdHi)
5192            aoccCount_Expr(uses, cas->expdHi);
5193         aoccCount_Expr(uses, cas->expdLo);
5194         if (cas->dataHi)
5195            aoccCount_Expr(uses, cas->dataHi);
5196         aoccCount_Expr(uses, cas->dataLo);
5197         return;
5198      case Ist_LLSC:
5199         aoccCount_Expr(uses, st->Ist.LLSC.addr);
5200         if (st->Ist.LLSC.storedata)
5201            aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5202         return;
5203      case Ist_Dirty:
5204         d = st->Ist.Dirty.details;
5205         if (d->mFx != Ifx_None)
5206            aoccCount_Expr(uses, d->mAddr);
5207         aoccCount_Expr(uses, d->guard);
5208         for (i = 0; d->args[i]; i++) {
5209            IRExpr* arg = d->args[i];
5210            if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5211               aoccCount_Expr(uses, arg);
5212         }
5213         return;
5214      case Ist_NoOp:
5215      case Ist_IMark:
5216      case Ist_MBE:
5217         return;
5218      case Ist_Exit:
5219         aoccCount_Expr(uses, st->Ist.Exit.guard);
5220         return;
5221      default:
5222         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5223         vpanic("aoccCount_Stmt");
5224   }
5225}
5226
5227/* Look up a binding for tmp in the env.  If found, return the bound
5228   expression, and set the env's binding to NULL so it is marked as
5229   used.  If not found, return NULL. */
5230
5231static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5232{
5233   Int i;
5234   for (i = 0; i < A_NENV; i++) {
5235      if (env[i].binder == tmp && env[i].bindee != NULL) {
5236         IRExpr* bindee = env[i].bindee;
5237         env[i].bindee = NULL;
5238         return bindee;
5239      }
5240   }
5241   return NULL;
5242}
5243
5244/* Traverse e, looking for temps.  For each observed temp, see if env
5245   contains a binding for the temp, and if so return the bound value.
5246   The env has the property that any binding it holds is
5247   'single-shot', so once a binding is used, it is marked as no longer
5248   available, by setting its .bindee field to NULL. */
5249
5250static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5251   return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5252}
5253static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5254   return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5255}
5256
5257static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5258{
5259   switch (op) {
5260   case Iop_Or32:
5261      /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
5262      if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5263         return IRExpr_Unop( Iop_CmpwNEZ32,
5264                             IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5265                                                     a2->Iex.Unop.arg ) );
5266      break;
5267
5268   case Iop_CmpNE32:
5269      /* Since X has type Ity_I1 we can simplify:
5270         CmpNE32(1Uto32(X),0)) ==> X */
5271      if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5272         return a1->Iex.Unop.arg;
5273      break;
5274
5275   default:
5276      break;
5277   }
5278   /* no reduction rule applies */
5279   return IRExpr_Binop( op, a1, a2 );
5280}
5281
5282static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5283{
5284   switch (op) {
5285   case Iop_CmpwNEZ64:
5286      /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5287      if (is_Unop(aa, Iop_CmpwNEZ64))
5288         return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5289      /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5290      if (is_Binop(aa, Iop_Or64)
5291          && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5292         return fold_IRExpr_Unop(
5293                   Iop_CmpwNEZ64,
5294                   IRExpr_Binop(Iop_Or64,
5295                                aa->Iex.Binop.arg1->Iex.Unop.arg,
5296                                aa->Iex.Binop.arg2));
5297      /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5298      if (is_Binop(aa, Iop_Or64)
5299          && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5300         return fold_IRExpr_Unop(
5301                   Iop_CmpwNEZ64,
5302                   IRExpr_Binop(Iop_Or64,
5303                                aa->Iex.Binop.arg1,
5304                                aa->Iex.Binop.arg2->Iex.Unop.arg));
5305      break;
5306   case Iop_CmpNEZ64:
5307      /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5308      if (is_Unop(aa, Iop_Left64))
5309         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5310      /* CmpNEZ64( 1Uto64(X) ) --> X */
5311      if (is_Unop(aa, Iop_1Uto64))
5312         return aa->Iex.Unop.arg;
5313      break;
5314   case Iop_CmpwNEZ32:
5315      /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5316      if (is_Unop(aa, Iop_CmpwNEZ32))
5317         return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5318      break;
5319   case Iop_CmpNEZ32:
5320      /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5321      if (is_Unop(aa, Iop_Left32))
5322         return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5323      /* CmpNEZ32( 1Uto32(X) ) --> X */
5324      if (is_Unop(aa, Iop_1Uto32))
5325         return aa->Iex.Unop.arg;
5326      /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5327      if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5328         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5329      break;
5330   case Iop_CmpNEZ8:
5331      /* CmpNEZ8( 1Uto8(X) ) --> X */
5332      if (is_Unop(aa, Iop_1Uto8))
5333         return aa->Iex.Unop.arg;
5334      break;
5335   case Iop_Left32:
5336      /* Left32( Left32(x) ) --> Left32(x) */
5337      if (is_Unop(aa, Iop_Left32))
5338         return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5339      break;
5340   case Iop_Left64:
5341      /* Left64( Left64(x) ) --> Left64(x) */
5342      if (is_Unop(aa, Iop_Left64))
5343         return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5344      break;
5345   case Iop_ZeroHI64ofV128:
5346      /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5347      if (is_Unop(aa, Iop_ZeroHI64ofV128))
5348         return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5349      break;
5350   case Iop_32to1:
5351      /* 32to1( 1Uto32 ( x ) ) --> x */
5352      if (is_Unop(aa, Iop_1Uto32))
5353         return aa->Iex.Unop.arg;
5354      /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5355      if (is_Unop(aa, Iop_CmpwNEZ32))
5356         return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5357      break;
5358   case Iop_64to1:
5359      /* 64to1( 1Uto64 ( x ) ) --> x */
5360      if (is_Unop(aa, Iop_1Uto64))
5361         return aa->Iex.Unop.arg;
5362      /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5363      if (is_Unop(aa, Iop_CmpwNEZ64))
5364         return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5365      break;
5366   case Iop_64to32:
5367      /* 64to32( 32Uto64 ( x )) --> x */
5368      if (is_Unop(aa, Iop_32Uto64))
5369         return aa->Iex.Unop.arg;
5370      /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5371      if (is_Unop(aa, Iop_8Uto64))
5372         return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5373      break;
5374
5375   case Iop_32Uto64:
5376      /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5377      if (is_Unop(aa, Iop_8Uto32))
5378         return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5379      /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5380      if (is_Unop(aa, Iop_16Uto32))
5381         return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5382      /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5383                     --> Shr64( 32Uto64(64to32(x)), sh )) */
5384      if (is_Unop(aa, Iop_64to32)
5385          && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5386          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5387          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5388                     Iop_64to32)) {
5389         return aa->Iex.Unop.arg;
5390      }
5391      /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5392         --> 32Uto64(64to32( Shl64(                x,   sh )) */
5393      if (is_Unop(aa, Iop_64to32)
5394          && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5395          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5396          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5397                     Iop_64to32)) {
5398         return
5399            IRExpr_Unop(
5400               Iop_32Uto64,
5401               IRExpr_Unop(
5402                  Iop_64to32,
5403                  IRExpr_Binop(
5404                     Iop_Shl64,
5405                     aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5406                     aa->Iex.Unop.arg->Iex.Binop.arg2
5407            )));
5408      }
5409      break;
5410
5411   case Iop_1Sto32:
5412      /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5413      if (is_Unop(aa, Iop_CmpNEZ8)
5414          && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5415          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5416          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5417                     Iop_CmpNEZ32)) {
5418         return IRExpr_Unop( Iop_CmpwNEZ32,
5419                             aa->Iex.Unop.arg->Iex.Unop.arg
5420                               ->Iex.Unop.arg->Iex.Unop.arg);
5421      }
5422      break;
5423
5424   default:
5425      break;
5426   }
5427   /* no reduction rule applies */
5428   return IRExpr_Unop( op, aa );
5429}
5430
5431static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5432{
5433   IRExpr*  e2;
5434   IRExpr** args2;
5435   Int      i;
5436
5437   switch (e->tag) {
5438
5439      case Iex_CCall:
5440         args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5441         for (i = 0; args2[i]; i++)
5442            args2[i] = atbSubst_Expr(env,args2[i]);
5443         return IRExpr_CCall(
5444                   e->Iex.CCall.cee,
5445                   e->Iex.CCall.retty,
5446                   args2
5447                );
5448      case Iex_RdTmp:
5449         e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5450         return e2 ? e2 : e;
5451      case Iex_ITE:
5452         return IRExpr_ITE(
5453                   atbSubst_Expr(env, e->Iex.ITE.cond),
5454                   atbSubst_Expr(env, e->Iex.ITE.iftrue),
5455                   atbSubst_Expr(env, e->Iex.ITE.iffalse)
5456                );
5457      case Iex_Qop:
5458         return IRExpr_Qop(
5459                   e->Iex.Qop.details->op,
5460                   atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5461                   atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5462                   atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5463                   atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5464                );
5465      case Iex_Triop:
5466         return IRExpr_Triop(
5467                   e->Iex.Triop.details->op,
5468                   atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5469                   atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5470                   atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5471                );
5472      case Iex_Binop:
5473         return fold_IRExpr_Binop(
5474                   e->Iex.Binop.op,
5475                   atbSubst_Expr(env, e->Iex.Binop.arg1),
5476                   atbSubst_Expr(env, e->Iex.Binop.arg2)
5477                );
5478      case Iex_Unop:
5479         return fold_IRExpr_Unop(
5480                   e->Iex.Unop.op,
5481                   atbSubst_Expr(env, e->Iex.Unop.arg)
5482                );
5483      case Iex_Load:
5484         return IRExpr_Load(
5485                   e->Iex.Load.end,
5486                   e->Iex.Load.ty,
5487                   atbSubst_Expr(env, e->Iex.Load.addr)
5488                );
5489      case Iex_GetI:
5490         return IRExpr_GetI(
5491                   e->Iex.GetI.descr,
5492                   atbSubst_Expr(env, e->Iex.GetI.ix),
5493                   e->Iex.GetI.bias
5494                );
5495      case Iex_Const:
5496      case Iex_Get:
5497         return e;
5498      default:
5499         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5500         vpanic("atbSubst_Expr");
5501   }
5502}
5503
5504/* Same deal as atbSubst_Expr, except for stmts. */
5505
5506static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5507{
5508   Int     i;
5509   IRDirty *d, *d2;
5510   IRCAS   *cas, *cas2;
5511   IRPutI  *puti, *puti2;
5512
5513   switch (st->tag) {
5514      case Ist_AbiHint:
5515         return IRStmt_AbiHint(
5516                   atbSubst_Expr(env, st->Ist.AbiHint.base),
5517                   st->Ist.AbiHint.len,
5518                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
5519                );
5520      case Ist_Store:
5521         return IRStmt_Store(
5522                   st->Ist.Store.end,
5523                   atbSubst_Expr(env, st->Ist.Store.addr),
5524                   atbSubst_Expr(env, st->Ist.Store.data)
5525                );
5526      case Ist_StoreG: {
5527         IRStoreG* sg = st->Ist.StoreG.details;
5528         return IRStmt_StoreG(sg->end,
5529                              atbSubst_Expr(env, sg->addr),
5530                              atbSubst_Expr(env, sg->data),
5531                              atbSubst_Expr(env, sg->guard));
5532      }
5533      case Ist_LoadG: {
5534         IRLoadG* lg = st->Ist.LoadG.details;
5535         return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5536                             atbSubst_Expr(env, lg->addr),
5537                             atbSubst_Expr(env, lg->alt),
5538                             atbSubst_Expr(env, lg->guard));
5539      }
5540      case Ist_WrTmp:
5541         return IRStmt_WrTmp(
5542                   st->Ist.WrTmp.tmp,
5543                   atbSubst_Expr(env, st->Ist.WrTmp.data)
5544                );
5545      case Ist_Put:
5546         return IRStmt_Put(
5547                   st->Ist.Put.offset,
5548                   atbSubst_Expr(env, st->Ist.Put.data)
5549                );
5550      case Ist_PutI:
5551         puti  = st->Ist.PutI.details;
5552         puti2 = mkIRPutI(puti->descr,
5553                          atbSubst_Expr(env, puti->ix),
5554                          puti->bias,
5555                          atbSubst_Expr(env, puti->data));
5556         return IRStmt_PutI(puti2);
5557
5558      case Ist_Exit:
5559         return IRStmt_Exit(
5560                   atbSubst_Expr(env, st->Ist.Exit.guard),
5561                   st->Ist.Exit.jk,
5562                   st->Ist.Exit.dst,
5563                   st->Ist.Exit.offsIP
5564                );
5565      case Ist_IMark:
5566         return IRStmt_IMark(st->Ist.IMark.addr,
5567                             st->Ist.IMark.len,
5568                             st->Ist.IMark.delta);
5569      case Ist_NoOp:
5570         return IRStmt_NoOp();
5571      case Ist_MBE:
5572         return IRStmt_MBE(st->Ist.MBE.event);
5573      case Ist_CAS:
5574         cas  = st->Ist.CAS.details;
5575         cas2 = mkIRCAS(
5576                   cas->oldHi, cas->oldLo, cas->end,
5577                   atbSubst_Expr(env, cas->addr),
5578                   cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5579                   atbSubst_Expr(env, cas->expdLo),
5580                   cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5581                   atbSubst_Expr(env, cas->dataLo)
5582                );
5583         return IRStmt_CAS(cas2);
5584      case Ist_LLSC:
5585         return IRStmt_LLSC(
5586                   st->Ist.LLSC.end,
5587                   st->Ist.LLSC.result,
5588                   atbSubst_Expr(env, st->Ist.LLSC.addr),
5589                   st->Ist.LLSC.storedata
5590                      ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5591                );
5592      case Ist_Dirty:
5593         d  = st->Ist.Dirty.details;
5594         d2 = emptyIRDirty();
5595         *d2 = *d;
5596         if (d2->mFx != Ifx_None)
5597            d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5598         d2->guard = atbSubst_Expr(env, d2->guard);
5599         for (i = 0; d2->args[i]; i++) {
5600            IRExpr* arg = d2->args[i];
5601            if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5602               d2->args[i] = atbSubst_Expr(env, arg);
5603         }
5604         return IRStmt_Dirty(d2);
5605      default:
5606         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5607         vpanic("atbSubst_Stmt");
5608   }
5609}
5610
5611inline
5612static Bool dirty_helper_stores ( const IRDirty *d )
5613{
5614   return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5615}
5616
5617inline
5618static Interval dirty_helper_puts (
5619                   const IRDirty *d,
5620                   Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5621                   VexRegisterUpdates pxControl,
5622                   /*OUT*/Bool *requiresPreciseMemExns
5623                )
5624{
5625   Int i;
5626   Interval interval;
5627
5628   /* Passing the guest state pointer opens the door to modifying the
5629      guest state under the covers.  It's not allowed, but let's be
5630      extra conservative and assume the worst. */
5631   for (i = 0; d->args[i]; i++) {
5632      if (UNLIKELY(d->args[i]->tag == Iex_GSPTR)) {
5633         *requiresPreciseMemExns = True;
5634         /* Assume all guest state is written. */
5635         interval.present = True;
5636         interval.low  = 0;
5637         interval.high = 0x7FFFFFFF;
5638         return interval;
5639      }
5640   }
5641
5642   /* Check the side effects on the guest state */
5643   interval.present = False;
5644   interval.low = interval.high = -1;
5645   *requiresPreciseMemExns = False;
5646
5647   for (i = 0; i < d->nFxState; ++i) {
5648      if (d->fxState[i].fx != Ifx_Read) {
5649         Int offset = d->fxState[i].offset;
5650         Int size = d->fxState[i].size;
5651         Int nRepeats = d->fxState[i].nRepeats;
5652         Int repeatLen = d->fxState[i].repeatLen;
5653
5654         if (preciseMemExnsFn(offset,
5655                              offset + nRepeats * repeatLen + size - 1,
5656                              pxControl)) {
5657            *requiresPreciseMemExns = True;
5658         }
5659         update_interval(&interval, offset,
5660                         offset + nRepeats * repeatLen + size - 1);
5661      }
5662   }
5663
5664   return interval;
5665}
5666
5667/* Return an interval if st modifies the guest state.  Via
5668   requiresPreciseMemExns return whether or not that modification
5669   requires precise exceptions. */
5670static Interval stmt_modifies_guest_state (
5671                   IRSB *bb, const IRStmt *st,
5672                   Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5673                   VexRegisterUpdates pxControl,
5674                   /*OUT*/Bool *requiresPreciseMemExns
5675                )
5676{
5677   Interval interval;
5678
5679   switch (st->tag) {
5680   case Ist_Put: {
5681      Int offset = st->Ist.Put.offset;
5682      Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5683
5684      *requiresPreciseMemExns
5685         = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5686      interval.present = True;
5687      interval.low  = offset;
5688      interval.high = offset + size - 1;
5689      return interval;
5690   }
5691
5692   case Ist_PutI: {
5693      IRRegArray *descr = st->Ist.PutI.details->descr;
5694      Int offset = descr->base;
5695      Int size = sizeofIRType(descr->elemTy);
5696
5697      /* We quietly assume here that all segments are contiguous and there
5698         are no holes. This is to avoid a loop. The assumption is conservative
5699         in the sense that we might report that precise memory exceptions are
5700         needed when in fact they are not. */
5701      *requiresPreciseMemExns
5702         = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5703                            pxControl);
5704      interval.present = True;
5705      interval.low  = offset;
5706      interval.high = offset + descr->nElems * size - 1;
5707      return interval;
5708   }
5709
5710   case Ist_Dirty:
5711      return dirty_helper_puts(st->Ist.Dirty.details,
5712                               preciseMemExnsFn, pxControl,
5713                               requiresPreciseMemExns);
5714
5715   default:
5716      *requiresPreciseMemExns = False;
5717      interval.present = False;
5718      interval.low  = -1;
5719      interval.high = -1;
5720      return interval;
5721   }
5722}
5723
5724/* notstatic */ Addr ado_treebuild_BB (
5725                        IRSB* bb,
5726                        Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5727                        VexRegisterUpdates pxControl
5728                     )
5729{
5730   Int      i, j, k, m;
5731   Bool     stmtStores, invalidateMe;
5732   Interval putInterval;
5733   IRStmt*  st;
5734   IRStmt*  st2;
5735   ATmpInfo env[A_NENV];
5736
5737   Bool   max_ga_known = False;
5738   Addr   max_ga       = 0;
5739
5740   Int       n_tmps = bb->tyenv->types_used;
5741   UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5742
5743   /* Phase 1.  Scan forwards in bb, counting use occurrences of each
5744      temp.  Also count occurrences in the bb->next field.  Take the
5745      opportunity to also find the maximum guest address in the block,
5746      since that will be needed later for deciding when we can safely
5747      elide event checks. */
5748
5749   for (i = 0; i < n_tmps; i++)
5750      uses[i] = 0;
5751
5752   for (i = 0; i < bb->stmts_used; i++) {
5753      st = bb->stmts[i];
5754      switch (st->tag) {
5755         case Ist_NoOp:
5756            continue;
5757         case Ist_IMark: {
5758            UInt len = st->Ist.IMark.len;
5759            Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5760            max_ga_known = True;
5761            if (mga > max_ga)
5762               max_ga = mga;
5763            break;
5764         }
5765         default:
5766            break;
5767      }
5768      aoccCount_Stmt( uses, st );
5769   }
5770   aoccCount_Expr(uses, bb->next );
5771
5772#  if 0
5773   for (i = 0; i < n_tmps; i++) {
5774      if (uses[i] == 0)
5775        continue;
5776      ppIRTemp( (IRTemp)i );
5777      vex_printf("  used %d\n", (Int)uses[i] );
5778   }
5779#  endif
5780
5781   /* Phase 2.  Scan forwards in bb.  For each statement in turn:
5782
5783         If the env is full, emit the end element.  This guarantees
5784         there is at least one free slot in the following.
5785
5786         On seeing 't = E', occ(t)==1,
5787            let E'=env(E)
5788            delete this stmt
5789            add t -> E' to the front of the env
5790            Examine E' and set the hints for E' appropriately
5791              (doesLoad? doesGet?)
5792
5793         On seeing any other stmt,
5794            let stmt' = env(stmt)
5795            remove from env any 't=E' binds invalidated by stmt
5796                emit the invalidated stmts
5797            emit stmt'
5798            compact any holes in env
5799              by sliding entries towards the front
5800
5801      Finally, apply env to bb->next.
5802   */
5803
5804   for (i = 0; i < A_NENV; i++) {
5805      env[i].bindee = NULL;
5806      env[i].binder = IRTemp_INVALID;
5807   }
5808
5809   /* The stmts in bb are being reordered, and we are guaranteed to
5810      end up with no more than the number we started with.  Use i to
5811      be the cursor of the current stmt examined and j <= i to be that
5812      for the current stmt being written.
5813   */
5814   j = 0;
5815   for (i = 0; i < bb->stmts_used; i++) {
5816
5817      st = bb->stmts[i];
5818      if (st->tag == Ist_NoOp)
5819         continue;
5820
5821      /* Ensure there's at least one space in the env, by emitting
5822         the oldest binding if necessary. */
5823      if (env[A_NENV-1].bindee != NULL) {
5824         bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5825                                      env[A_NENV-1].bindee );
5826         j++;
5827         vassert(j <= i);
5828         env[A_NENV-1].bindee = NULL;
5829      }
5830
5831      /* Consider current stmt. */
5832      if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5833         IRExpr *e, *e2;
5834
5835         /* optional extra: dump dead bindings as we find them.
5836            Removes the need for a prior dead-code removal pass. */
5837         if (uses[st->Ist.WrTmp.tmp] == 0) {
5838	    if (0) vex_printf("DEAD binding\n");
5839            continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5840         }
5841         vassert(uses[st->Ist.WrTmp.tmp] == 1);
5842
5843         /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
5844            actions. */
5845         e  = st->Ist.WrTmp.data;
5846         e2 = atbSubst_Expr(env, e);
5847         addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5848         setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5849         /* don't advance j, as we are deleting this stmt and instead
5850            holding it temporarily in the env. */
5851         continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5852      }
5853
5854      /* we get here for any other kind of statement. */
5855      /* 'use up' any bindings required by the current statement. */
5856      st2 = atbSubst_Stmt(env, st);
5857
5858      /* Now, before this stmt, dump any bindings in env that it
5859         invalidates.  These need to be dumped in the order in which
5860         they originally entered env -- that means from oldest to
5861         youngest. */
5862
5863      /* putInterval/stmtStores characterise what the stmt under
5864         consideration does, or might do (sidely safe @ True). */
5865
5866      Bool putRequiresPreciseMemExns;
5867      putInterval = stmt_modifies_guest_state(
5868                       bb, st, preciseMemExnsFn, pxControl,
5869                       &putRequiresPreciseMemExns
5870                    );
5871
5872      /* be True if this stmt writes memory or might do (==> we don't
5873         want to reorder other loads or stores relative to it).  Also,
5874         both LL and SC fall under this classification, since we
5875         really ought to be conservative and not reorder any other
5876         memory transactions relative to them. */
5877      stmtStores
5878         = toBool( st->tag == Ist_Store
5879                   || (st->tag == Ist_Dirty
5880                       && dirty_helper_stores(st->Ist.Dirty.details))
5881                   || st->tag == Ist_LLSC
5882                   || st->tag == Ist_CAS );
5883
5884      for (k = A_NENV-1; k >= 0; k--) {
5885         if (env[k].bindee == NULL)
5886            continue;
5887         /* Compare the actions of this stmt with the actions of
5888            binding 'k', to see if they invalidate the binding. */
5889         invalidateMe
5890            = toBool(
5891              /* a store invalidates loaded data */
5892              (env[k].doesLoad && stmtStores)
5893              /* a put invalidates get'd data, if they overlap */
5894              || ((env[k].getInterval.present && putInterval.present) &&
5895                  intervals_overlap(env[k].getInterval, putInterval))
5896              /* a put invalidates loaded data. That means, in essense, that
5897                 a load expression cannot be substituted into a statement
5898                 that follows the put. But there is nothing wrong doing so
5899                 except when the put statement requries precise exceptions.
5900                 Think of a load that is moved past a put where the put
5901                 updates the IP in the guest state. If the load generates
5902                 a segfault, the wrong address (line number) would be
5903                 reported. */
5904              || (env[k].doesLoad && putInterval.present &&
5905                  putRequiresPreciseMemExns)
5906              /* probably overly conservative: a memory bus event
5907                 invalidates absolutely everything, so that all
5908                 computation prior to it is forced to complete before
5909                 proceeding with the event (fence,lock,unlock). */
5910              || st->tag == Ist_MBE
5911              /* also be (probably overly) paranoid re AbiHints */
5912              || st->tag == Ist_AbiHint
5913              );
5914         if (invalidateMe) {
5915            bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5916            j++;
5917            vassert(j <= i);
5918            env[k].bindee = NULL;
5919         }
5920      }
5921
5922      /* Slide in-use entries in env up to the front */
5923      m = 0;
5924      for (k = 0; k < A_NENV; k++) {
5925         if (env[k].bindee != NULL) {
5926            env[m] = env[k];
5927            m++;
5928	 }
5929      }
5930      for (m = m; m < A_NENV; m++) {
5931         env[m].bindee = NULL;
5932      }
5933
5934      /* finally, emit the substituted statement */
5935      bb->stmts[j] = st2;
5936      /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5937      j++;
5938
5939      vassert(j <= i+1);
5940   } /* for each stmt in the original bb ... */
5941
5942   /* Finally ... substitute the ->next field as much as possible, and
5943      dump any left-over bindings.  Hmm.  Perhaps there should be no
5944      left over bindings?  Or any left-over bindings are
5945      by definition dead? */
5946   bb->next = atbSubst_Expr(env, bb->next);
5947   bb->stmts_used = j;
5948
5949   return max_ga_known ? max_ga : ~(Addr)0;
5950}
5951
5952
5953/*---------------------------------------------------------------*/
5954/*--- MSVC specific transformation hacks                      ---*/
5955/*---------------------------------------------------------------*/
5956
5957/* The purpose of all this is to find MSVC's idiom for non-constant
5958   bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5959   gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
5960   generates a lot of false positives from the MSVC version because it
5961   doesn't understand that XORing an undefined bit with itself gives a
5962   defined result.
5963
5964   This isn't a problem for the simple case "x ^ x", because iropt
5965   folds it to a constant zero before Memcheck ever sees it.  But in
5966   this case we have an intervening "& c" which defeats the simple
5967   case.  So we have to carefully inspect all expressions rooted at an
5968   XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5969   7 other variants resulting from swapping the order of arguments to
5970   the three binary operations.  If we get a match, we then replace
5971   the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5972
5973   The key difficulty is to spot the two uses of "a".  To normalise
5974   the IR to maximise the chances of success, we first do a CSE pass,
5975   with CSEing of loads enabled, since the two "a" expressions may be
5976   loads, which need to be commoned up.  Then we do a constant folding
5977   pass, so as to remove any tmp-to-tmp assignment chains that would
5978   make matching the original expression more difficult.
5979*/
5980
5981
5982/* Helper function for debug printing */
5983__attribute__((unused))
5984static void print_flat_expr ( IRExpr** env, IRExpr* e )
5985{
5986   if (e == NULL) {
5987      vex_printf("?");
5988      return;
5989   }
5990   switch (e->tag) {
5991      case Iex_Binop: {
5992         ppIROp(e->Iex.Binop.op);
5993         vex_printf("(");
5994         print_flat_expr(env, e->Iex.Binop.arg1);
5995         vex_printf(",");
5996         print_flat_expr(env, e->Iex.Binop.arg2);
5997         vex_printf(")");
5998         break;
5999      }
6000      case Iex_Unop: {
6001         ppIROp(e->Iex.Unop.op);
6002         vex_printf("(");
6003         print_flat_expr(env, e->Iex.Unop.arg);
6004         vex_printf(")");
6005         break;
6006      }
6007      case Iex_RdTmp:
6008         ppIRTemp(e->Iex.RdTmp.tmp);
6009         vex_printf("=");
6010         print_flat_expr(env, chase1(env, e));
6011         break;
6012      case Iex_Const:
6013      case Iex_CCall:
6014      case Iex_Load:
6015      case Iex_ITE:
6016      case Iex_Get:
6017         ppIRExpr(e);
6018         break;
6019      default:
6020         vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6021         vassert(0);
6022   }
6023}
6024
6025/* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
6026   or any of the other 7 variants generated by switching the order
6027   of arguments to the outer ^, the inner ^ and the &.
6028*/
6029static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6030                                     /*OUT*/IRExpr** cc,
6031                                     IRExpr** env, IRExpr* e,
6032                                     IROp opAND, IROp opXOR)
6033{
6034#  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6035                              && (_e)->Iex.Binop.op == (_op))
6036#  define ISATOM(_e)    isIRAtom(_e)
6037#  define STEP(_e)      chase1(env, (_e))
6038#  define LL(_e)        ((_e)->Iex.Binop.arg1)
6039#  define RR(_e)        ((_e)->Iex.Binop.arg2)
6040
6041   IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6042
6043   /* This is common to all 8 cases */
6044   if (!ISBIN(e, opXOR)) goto fail;
6045
6046   /*                        -----and------ */
6047   /*                         --xor---      */
6048   /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
6049   /* find variant 2:   a1 ^ ((b ^ a2) & c) */
6050   a1 = and = xor = c = a2bL = a2bR = NULL;
6051
6052   a1   = LL(e);
6053   and  = STEP(RR(e));
6054   if (!ISBIN(and, opAND)) goto v34;
6055   xor  = STEP(LL(and));
6056   c    = RR(and);
6057   if (!ISBIN(xor, opXOR)) goto v34;
6058   a2bL = LL(xor);
6059   a2bR = RR(xor);
6060
6061   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6062      *aa = a1;
6063      *bb = a2bR;
6064      *cc = c;
6065      return 1;
6066   }
6067   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6068      *aa = a1;
6069      *bb = a2bL;
6070      *cc = c;
6071      return 2;
6072   }
6073
6074  v34:
6075   /*                   -----and------      */
6076   /*                    --xor---           */
6077   /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
6078   /* find variant 4:   ((b ^ a2) & c) ^ a1 */
6079   a1 = and = xor = c = a2bL = a2bR = NULL;
6080
6081   a1   = RR(e);
6082   and  = STEP(LL(e));
6083   if (!ISBIN(and, opAND)) goto v56;
6084   xor  = STEP(LL(and));
6085   c    = RR(and);
6086   if (!ISBIN(xor, opXOR)) goto v56;
6087   a2bL = LL(xor);
6088   a2bR = RR(xor);
6089
6090   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6091      *aa = a1;
6092      *bb = a2bR;
6093      *cc = c;
6094      return 3;
6095   }
6096   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6097      *aa = a1;
6098      *bb = a2bL;
6099      *cc = c;
6100      return 4;
6101   }
6102
6103  v56:
6104   /*                        -----and------ */
6105   /*                         --xor---      */
6106   /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
6107   /* find variant 6:   a1 ^ (c & (b ^ a2)) */
6108   a1 = and = xor = c = a2bL = a2bR = NULL;
6109
6110   a1   = LL(e);
6111   and  = STEP(RR(e));
6112   if (!ISBIN(and, opAND)) goto v78;
6113   xor  = STEP(RR(and));
6114   c    = LL(and);
6115   if (!ISBIN(xor, opXOR)) goto v78;
6116   a2bL = LL(xor);
6117   a2bR = RR(xor);
6118
6119   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6120      *aa = a1;
6121      *bb = a2bR;
6122      *cc = c;
6123      vassert(5-5); // ATC
6124      return 5;
6125   }
6126   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6127      *aa = a1;
6128      *bb = a2bL;
6129      *cc = c;
6130      vassert(6-6); // ATC
6131      return 6;
6132   }
6133
6134 v78:
6135   /*                   -----and------      */
6136   /*                    --xor---           */
6137   /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
6138   /* find variant 8:   (c & (b ^ a2)) ^ a1 */
6139   a1 = and = xor = c = a2bL = a2bR = NULL;
6140
6141   a1   = RR(e);
6142   and  = STEP(LL(e));
6143   if (!ISBIN(and, opAND)) goto fail;
6144   xor  = STEP(RR(and));
6145   c    = LL(and);
6146   if (!ISBIN(xor, opXOR)) goto fail;
6147   a2bL = LL(xor);
6148   a2bR = RR(xor);
6149
6150   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6151      *aa = a1;
6152      *bb = a2bR;
6153      *cc = c;
6154      return 7;
6155   }
6156   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6157      *aa = a1;
6158      *bb = a2bL;
6159      *cc = c;
6160      return 8;
6161   }
6162
6163 fail:
6164   return 0;
6165
6166#  undef ISBIN
6167#  undef ISATOM
6168#  undef STEP
6169#  undef LL
6170#  undef RR
6171}
6172
6173/* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6174   variants thereof generated by switching arguments around), return
6175   the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
6176static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6177{
6178   if (e->tag != Iex_Binop)
6179      return NULL;
6180
6181   const HChar* tyNm = NULL;
6182   IROp   opOR  = Iop_INVALID;
6183   IROp   opAND = Iop_INVALID;
6184   IROp   opNOT = Iop_INVALID;
6185   IROp   opXOR = Iop_INVALID;
6186   switch (e->Iex.Binop.op) {
6187      case Iop_Xor32:
6188         tyNm  = "I32";
6189         opOR  = Iop_Or32;  opAND = Iop_And32;
6190         opNOT = Iop_Not32; opXOR = Iop_Xor32;
6191         break;
6192      case Iop_Xor16:
6193         tyNm  = "I16";
6194         opOR  = Iop_Or16;  opAND = Iop_And16;
6195         opNOT = Iop_Not16; opXOR = Iop_Xor16;
6196         break;
6197      case Iop_Xor8:
6198         tyNm  = "I8";
6199         opOR  = Iop_Or8;  opAND = Iop_And8;
6200         opNOT = Iop_Not8; opXOR = Iop_Xor8;
6201         break;
6202      default:
6203         return NULL;
6204   }
6205
6206   IRExpr* aa = NULL;
6207   IRExpr* bb = NULL;
6208   IRExpr* cc = NULL;
6209   UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6210   if (variant > 0) {
6211      static UInt ctr = 0;
6212      if (0)
6213         vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6214                    "type %s, variant %u\n",
6215                    ++ctr, tyNm, variant);
6216      /* It's vitally important that the returned aa, bb and cc are
6217         atoms -- either constants or tmps.  If it's anything else
6218         (eg, a GET) then incorporating them in a tree at this point
6219         in the SB may erroneously pull them forwards (eg of a PUT
6220         that originally was after the GET) and so transform the IR
6221         wrongly.  spotBitfieldAssignment should guarantee only to
6222         give us atoms, but we check here anyway. */
6223      vassert(aa && isIRAtom(aa));
6224      vassert(bb && isIRAtom(bb));
6225      vassert(cc && isIRAtom(cc));
6226      return IRExpr_Binop(
6227                opOR,
6228                IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6229                IRExpr_Binop(opAND, bb, cc)
6230             );
6231   }
6232   return NULL;
6233}
6234
6235
6236/* SB is modified in-place.  Visit all the IRExprs and, for those
6237   which are allowed to be non-atomic, perform the XOR transform if
6238   possible.  This makes |sb| be non-flat, but that's ok, the caller
6239   can re-flatten it.  Returns True iff any changes were made. */
6240static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6241{
6242   Int  i;
6243   Bool changed = False;
6244
6245   /* Make the tmp->expr environment, so we can use it for
6246      chasing expressions. */
6247   Int      n_tmps = sb->tyenv->types_used;
6248   IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6249   for (i = 0; i < n_tmps; i++)
6250      env[i] = NULL;
6251
6252   for (i = 0; i < sb->stmts_used; i++) {
6253      IRStmt* st = sb->stmts[i];
6254      if (st->tag != Ist_WrTmp)
6255         continue;
6256      IRTemp t = st->Ist.WrTmp.tmp;
6257      vassert(t >= 0 && t < n_tmps);
6258      env[t] = st->Ist.WrTmp.data;
6259   }
6260
6261   for (i = 0; i < sb->stmts_used; i++) {
6262      IRStmt* st = sb->stmts[i];
6263
6264      switch (st->tag) {
6265         case Ist_AbiHint:
6266            vassert(isIRAtom(st->Ist.AbiHint.base));
6267            vassert(isIRAtom(st->Ist.AbiHint.nia));
6268            break;
6269         case Ist_Put:
6270            vassert(isIRAtom(st->Ist.Put.data));
6271            break;
6272         case Ist_PutI: {
6273            IRPutI* puti = st->Ist.PutI.details;
6274            vassert(isIRAtom(puti->ix));
6275            vassert(isIRAtom(puti->data));
6276            break;
6277         }
6278         case Ist_WrTmp: {
6279            /* This is the one place where an expr (st->Ist.WrTmp.data) is
6280               allowed to be more than just a constant or a tmp. */
6281            IRExpr* mb_new_data
6282               = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6283            if (mb_new_data) {
6284               //ppIRSB(sb);
6285               st->Ist.WrTmp.data = mb_new_data;
6286               //ppIRSB(sb);
6287               changed = True;
6288            }
6289            break;
6290         }
6291         case Ist_Store:
6292            vassert(isIRAtom(st->Ist.Store.addr));
6293            vassert(isIRAtom(st->Ist.Store.data));
6294            break;
6295         case Ist_StoreG: {
6296            IRStoreG* sg = st->Ist.StoreG.details;
6297            vassert(isIRAtom(sg->addr));
6298            vassert(isIRAtom(sg->data));
6299            vassert(isIRAtom(sg->guard));
6300            break;
6301         }
6302         case Ist_LoadG: {
6303            IRLoadG* lg = st->Ist.LoadG.details;
6304            vassert(isIRAtom(lg->addr));
6305            vassert(isIRAtom(lg->alt));
6306            vassert(isIRAtom(lg->guard));
6307            break;
6308         }
6309         case Ist_CAS: {
6310            IRCAS* cas = st->Ist.CAS.details;
6311            vassert(isIRAtom(cas->addr));
6312            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6313            vassert(isIRAtom(cas->expdLo));
6314            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6315            vassert(isIRAtom(cas->dataLo));
6316            break;
6317         }
6318         case Ist_LLSC:
6319            vassert(isIRAtom(st->Ist.LLSC.addr));
6320            if (st->Ist.LLSC.storedata)
6321               vassert(isIRAtom(st->Ist.LLSC.storedata));
6322            break;
6323         case Ist_Dirty: {
6324            IRDirty* d = st->Ist.Dirty.details;
6325            if (d->mFx != Ifx_None) {
6326               vassert(isIRAtom(d->mAddr));
6327            }
6328            vassert(isIRAtom(d->guard));
6329            for (Int j = 0; d->args[j]; j++) {
6330               IRExpr* arg = d->args[j];
6331               if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
6332                  vassert(isIRAtom(arg));
6333               }
6334            }
6335            break;
6336         }
6337         case Ist_IMark:
6338         case Ist_NoOp:
6339         case Ist_MBE:
6340            break;
6341         case Ist_Exit:
6342            vassert(isIRAtom(st->Ist.Exit.guard));
6343            break;
6344         default:
6345            vex_printf("\n"); ppIRStmt(st);
6346            vpanic("do_XOR_TRANSFORMS_IRSB");
6347      }
6348   }
6349
6350   vassert(isIRAtom(sb->next));
6351   return changed;
6352}
6353
6354
6355static IRSB* do_MSVC_HACKS ( IRSB* sb )
6356{
6357   // Normalise as much as we can.  This is the one-and-only place
6358   // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6359   Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6360   if (any_cse_changes) {
6361      // CSEing might have created dead code.  Remove it.
6362      sb = cprop_BB ( sb );
6363      do_deadcode_BB(sb);
6364   }
6365
6366   // Visit all atoms, do the transformation proper.  bb is modified
6367   // in-place.
6368   Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6369
6370   if (changed) {
6371      // The transformation generates non-flat expressions, so we now
6372      // need to re-flatten the block.
6373      sb = flatten_BB(sb);
6374   }
6375
6376   return sb;
6377}
6378
6379
6380/*---------------------------------------------------------------*/
6381/*--- iropt main                                              ---*/
6382/*---------------------------------------------------------------*/
6383
6384static Bool iropt_verbose = False; /* True; */
6385
6386
6387/* Do a simple cleanup pass on bb.  This is: redundant Get removal,
6388   redundant Put removal, constant propagation, dead code removal,
6389   clean helper specialisation, and dead code removal (again).
6390*/
6391
6392
6393static
6394IRSB* cheap_transformations (
6395         IRSB* bb,
6396         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6397         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6398         VexRegisterUpdates pxControl
6399      )
6400{
6401   redundant_get_removal_BB ( bb );
6402   if (iropt_verbose) {
6403      vex_printf("\n========= REDUNDANT GET\n\n" );
6404      ppIRSB(bb);
6405   }
6406
6407   if (pxControl < VexRegUpdAllregsAtEachInsn) {
6408      redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6409   }
6410   if (iropt_verbose) {
6411      vex_printf("\n========= REDUNDANT PUT\n\n" );
6412      ppIRSB(bb);
6413   }
6414
6415   bb = cprop_BB ( bb );
6416   if (iropt_verbose) {
6417      vex_printf("\n========= CPROPD\n\n" );
6418      ppIRSB(bb);
6419   }
6420
6421   do_deadcode_BB ( bb );
6422   if (iropt_verbose) {
6423      vex_printf("\n========= DEAD\n\n" );
6424      ppIRSB(bb);
6425   }
6426
6427   bb = spec_helpers_BB ( bb, specHelper );
6428   do_deadcode_BB ( bb );
6429   if (iropt_verbose) {
6430      vex_printf("\n========= SPECd \n\n" );
6431      ppIRSB(bb);
6432   }
6433
6434   return bb;
6435}
6436
6437
6438/* Do some more expensive transformations on bb, which are aimed at
6439   optimising as much as possible in the presence of GetI and PutI.  */
6440
6441static
6442IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6443{
6444   (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6445   collapse_AddSub_chains_BB( bb );
6446   do_redundant_GetI_elimination( bb );
6447   if (pxControl < VexRegUpdAllregsAtEachInsn) {
6448      do_redundant_PutI_elimination( bb, pxControl );
6449   }
6450   do_deadcode_BB( bb );
6451   return bb;
6452}
6453
6454
6455/* Scan a flattened BB to look for signs that more expensive
6456   optimisations might be useful:
6457   - find out if there are any GetIs and PutIs
6458   - find out if there are any floating or vector-typed temporaries
6459*/
6460
6461static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6462                                 /*OUT*/Bool* hasVorFtemps,
6463                                 IRSB* bb )
6464{
6465   Int      i, j;
6466   IRStmt*  st;
6467   IRDirty* d;
6468   IRCAS*   cas;
6469
6470   *hasGetIorPutI = False;
6471   *hasVorFtemps  = False;
6472
6473   for (i = 0; i < bb->stmts_used; i++) {
6474      st = bb->stmts[i];
6475      switch (st->tag) {
6476         case Ist_AbiHint:
6477            vassert(isIRAtom(st->Ist.AbiHint.base));
6478            vassert(isIRAtom(st->Ist.AbiHint.nia));
6479            break;
6480         case Ist_PutI:
6481            *hasGetIorPutI = True;
6482            break;
6483         case Ist_WrTmp:
6484            if (st->Ist.WrTmp.data->tag == Iex_GetI)
6485               *hasGetIorPutI = True;
6486            switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6487               case Ity_I1: case Ity_I8: case Ity_I16:
6488               case Ity_I32: case Ity_I64: case Ity_I128:
6489                  break;
6490               case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6491               case Ity_V128: case Ity_V256:
6492                  *hasVorFtemps = True;
6493                  break;
6494               case Ity_D32: case Ity_D64: case Ity_D128:
6495                  *hasVorFtemps = True;
6496                  break;
6497               default:
6498                  goto bad;
6499            }
6500            break;
6501         case Ist_Put:
6502            vassert(isIRAtom(st->Ist.Put.data));
6503            break;
6504         case Ist_Store:
6505            vassert(isIRAtom(st->Ist.Store.addr));
6506            vassert(isIRAtom(st->Ist.Store.data));
6507            break;
6508         case Ist_StoreG: {
6509            IRStoreG* sg = st->Ist.StoreG.details;
6510            vassert(isIRAtom(sg->addr));
6511            vassert(isIRAtom(sg->data));
6512            vassert(isIRAtom(sg->guard));
6513            break;
6514         }
6515         case Ist_LoadG: {
6516            IRLoadG* lg = st->Ist.LoadG.details;
6517            vassert(isIRAtom(lg->addr));
6518            vassert(isIRAtom(lg->alt));
6519            vassert(isIRAtom(lg->guard));
6520            break;
6521         }
6522         case Ist_CAS:
6523            cas = st->Ist.CAS.details;
6524            vassert(isIRAtom(cas->addr));
6525            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6526            vassert(isIRAtom(cas->expdLo));
6527            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6528            vassert(isIRAtom(cas->dataLo));
6529            break;
6530         case Ist_LLSC:
6531            vassert(isIRAtom(st->Ist.LLSC.addr));
6532            if (st->Ist.LLSC.storedata)
6533               vassert(isIRAtom(st->Ist.LLSC.storedata));
6534            break;
6535         case Ist_Dirty:
6536            d = st->Ist.Dirty.details;
6537            vassert(isIRAtom(d->guard));
6538            for (j = 0; d->args[j]; j++) {
6539               IRExpr* arg = d->args[j];
6540               if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
6541                  vassert(isIRAtom(arg));
6542            }
6543            if (d->mFx != Ifx_None)
6544               vassert(isIRAtom(d->mAddr));
6545            break;
6546         case Ist_NoOp:
6547         case Ist_IMark:
6548         case Ist_MBE:
6549            break;
6550         case Ist_Exit:
6551            vassert(isIRAtom(st->Ist.Exit.guard));
6552            break;
6553         default:
6554         bad:
6555            ppIRStmt(st);
6556            vpanic("considerExpensives");
6557      }
6558   }
6559}
6560
6561
6562/* ---------------- The main iropt entry point. ---------------- */
6563
6564/* exported from this file */
6565/* Rules of the game:
6566
6567   - IRExpr/IRStmt trees should be treated as immutable, as they
6568     may get shared.  So never change a field of such a tree node;
6569     instead construct and return a new one if needed.
6570*/
6571
6572
6573IRSB* do_iropt_BB(
6574         IRSB* bb0,
6575         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6576         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6577         VexRegisterUpdates pxControl,
6578         Addr    guest_addr,
6579         VexArch guest_arch
6580      )
6581{
6582   static Int n_total     = 0;
6583   static Int n_expensive = 0;
6584
6585   Bool hasGetIorPutI, hasVorFtemps;
6586   IRSB *bb, *bb2;
6587
6588   n_total++;
6589
6590   /* First flatten the block out, since all other
6591      phases assume flat code. */
6592
6593   bb = flatten_BB ( bb0 );
6594
6595   if (iropt_verbose) {
6596      vex_printf("\n========= FLAT\n\n" );
6597      ppIRSB(bb);
6598   }
6599
6600   /* If at level 0, stop now. */
6601   if (vex_control.iropt_level <= 0) return bb;
6602
6603   /* Now do a preliminary cleanup pass, and figure out if we also
6604      need to do 'expensive' optimisations.  Expensive optimisations
6605      are deemed necessary if the block contains any GetIs or PutIs.
6606      If needed, do expensive transformations and then another cheap
6607      cleanup pass. */
6608
6609   bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6610
6611   if (guest_arch == VexArchARM) {
6612      /* Translating Thumb2 code produces a lot of chaff.  We have to
6613         work extra hard to get rid of it. */
6614      bb = cprop_BB(bb);
6615      bb = spec_helpers_BB ( bb, specHelper );
6616      if (pxControl < VexRegUpdAllregsAtEachInsn) {
6617         redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6618      }
6619      do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6620      do_deadcode_BB( bb );
6621   }
6622
6623   if (vex_control.iropt_level > 1) {
6624
6625      /* Peer at what we have, to decide how much more effort to throw
6626         at it. */
6627      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6628
6629      if (hasVorFtemps && !hasGetIorPutI) {
6630         /* If any evidence of FP or Vector activity, CSE, as that
6631            tends to mop up all manner of lardy code to do with
6632            rounding modes.  Don't bother if hasGetIorPutI since that
6633            case leads into the expensive transformations, which do
6634            CSE anyway. */
6635         (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6636         do_deadcode_BB( bb );
6637      }
6638
6639      if (hasGetIorPutI) {
6640         Bool cses;
6641         n_expensive++;
6642         if (DEBUG_IROPT)
6643            vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6644         bb = expensive_transformations( bb, pxControl );
6645         bb = cheap_transformations( bb, specHelper,
6646                                     preciseMemExnsFn, pxControl );
6647         /* Potentially common up GetIs */
6648         cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6649         if (cses)
6650            bb = cheap_transformations( bb, specHelper,
6651                                        preciseMemExnsFn, pxControl );
6652      }
6653
6654      ///////////////////////////////////////////////////////////
6655      // BEGIN MSVC optimised code transformation hacks
6656      if (0)
6657         bb = do_MSVC_HACKS(bb);
6658      // END   MSVC optimised code transformation hacks
6659      ///////////////////////////////////////////////////////////
6660
6661      /* Now have a go at unrolling simple (single-BB) loops.  If
6662         successful, clean up the results as much as possible. */
6663
6664      bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6665      if (bb2) {
6666         bb = cheap_transformations( bb2, specHelper,
6667                                     preciseMemExnsFn, pxControl );
6668         if (hasGetIorPutI) {
6669            bb = expensive_transformations( bb, pxControl );
6670            bb = cheap_transformations( bb, specHelper,
6671                                        preciseMemExnsFn, pxControl );
6672         } else {
6673            /* at least do CSE and dead code removal */
6674            do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6675            do_deadcode_BB( bb );
6676         }
6677         if (0) vex_printf("vex iropt: unrolled a loop\n");
6678      }
6679
6680   }
6681
6682   return bb;
6683}
6684
6685
6686/*---------------------------------------------------------------*/
6687/*--- end                                            ir_opt.c ---*/
6688/*---------------------------------------------------------------*/
6689