1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*---------------------------------------------------------------*/
4/*--- begin                                          ir_opt.c ---*/
5/*---------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2004-2013 OpenWorks LLP
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#include "libvex_basictypes.h"
38#include "libvex_ir.h"
39#include "libvex.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "ir_opt.h"
44
45
46/* Set to 1 for lots of debugging output. */
47#define DEBUG_IROPT 0
48
49/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50#define STATS_IROPT 0
51
52
53/* What iropt does, 29 Dec 04.
54
55   It takes an IRSB and produces a new one with the same meaning,
56   defined thus:
57
58   After execution of the new BB, all guest state and guest memory is
59   the same as after execution of the original.  This is true
60   regardless of how the block was exited (at the end vs side exit).
61
62   In addition, parts of the guest state will be identical to that
63   created by execution of the original at the following observation
64   points:
65
66   * In a dirty helper call, any parts of the guest state that the
67     helper states that it reads or modifies will be up to date.
68     Also, guest memory will be up to date.  Parts of the guest state
69     not marked as being read or modified by the helper cannot be
70     assumed to be up-to-date at the point where the helper is called.
71
72   * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73     The guest state is only up to date only as explained above
74     (i.e. at SB exits and as specified by dirty helper call).
75     Also, the stack pointer register is up to date at memory
76     exception points (as this is needed for the stack extension
77     logic in m_signals.c).
78
79   * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80     Immediately prior to any load or store, those parts of the guest
81     state marked as requiring precise exceptions will be up to date.
82     Also, guest memory will be up to date.  Parts of the guest state
83     not marked as requiring precise exceptions cannot be assumed to
84     be up-to-date at the point of the load/store.
85
86   * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87     Same as minimal, but all the guest state is up to date at memory
88     exception points.
89
90   * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91     Guest state is up to date at each instruction.
92
93   The relative order of loads and stores (including loads/stores of
94   guest memory done by dirty helpers annotated as such) is not
95   changed.  However, the relative order of loads with no intervening
96   stores/modifies may be changed.
97
98   Transformation order
99   ~~~~~~~~~~~~~~~~~~~~
100
101   There are three levels of optimisation, controlled by
102   vex_control.iropt_level.  Define first:
103
104   "Cheap transformations" are the following sequence:
105      * Redundant-Get removal
106      * Redundant-Put removal
107      * Constant propagation/folding
108      * Dead code removal
109      * Specialisation of clean helper functions
110      * Dead code removal
111
112   "Expensive transformations" are the following sequence:
113      * CSE
114      * Folding of add/sub chains
115      * Redundant-GetI removal
116      * Redundant-PutI removal
117      * Dead code removal
118
119   Then the transformations are as follows, as defined by
120   vex_control.iropt_level:
121
122   Level 0:
123      * Flatten into atomic form.
124
125   Level 1: the following sequence:
126      * Flatten into atomic form.
127      * Cheap transformations.
128
129   Level 2: the following sequence
130      * Flatten into atomic form.
131      * Cheap transformations.
132      * If block contains any floating or vector types, CSE.
133      * If block contains GetI or PutI, Expensive transformations.
134      * Try unrolling loops.  Three possible outcomes:
135        - No effect: do nothing more.
136        - Unrolled a loop, and block does not contain GetI or PutI:
137          Do: * CSE
138              * Dead code removal
139        - Unrolled a loop, and block contains GetI or PutI:
140          Do: * Expensive transformations
141              * Cheap transformations
142*/
143
144/* Implementation notes, 29 Dec 04.
145
146   TODO (important): I think rPutI removal ignores precise exceptions
147   and is therefore in a sense, wrong.  In the sense that PutIs are
148   assumed not to write parts of the guest state that we need to have
149   up-to-date at loads/stores.  So far on x86 guest that has not
150   mattered since indeed only the x87 FP registers and tags are
151   accessed using GetI/PutI, and there is no need so far for them to
152   be up to date at mem exception points.  The rPutI pass should be
153   fixed.
154
155   TODO: improve pessimistic handling of precise exceptions
156     in the tree builder.
157
158   TODO: check interaction of rGetI and dirty helpers.
159
160   F64i constants are treated differently from other constants.
161   They are not regarded as atoms, and instead lifted off and
162   bound to temps.  This allows them to participate in CSE, which
163   is important for getting good performance for x86 guest code.
164
165   CSE up F64 literals (already doing F64is)
166
167   CSE: consider carefully the requirement for precise exns
168        prior to making CSE any more aggressive.  */
169
170
171/*---------------------------------------------------------------*/
172/*--- Finite mappery, of a sort                               ---*/
173/*---------------------------------------------------------------*/
174
175/* General map from HWord-sized thing HWord-sized thing.  Could be by
176   hashing, but it's not clear whether or not this would really be any
177   faster. */
178
179typedef
180   struct {
181      Bool*  inuse;
182      HWord* key;
183      HWord* val;
184      Int    size;
185      Int    used;
186   }
187   HashHW;
188
189static HashHW* newHHW ( void )
190{
191   HashHW* h = LibVEX_Alloc(sizeof(HashHW));
192   h->size   = 8;
193   h->used   = 0;
194   h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
195   h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
196   h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
197   return h;
198}
199
200
201/* Look up key in the map. */
202
203static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204{
205   Int i;
206   /* vex_printf("lookupHHW(%llx)\n", key ); */
207   for (i = 0; i < h->used; i++) {
208      if (h->inuse[i] && h->key[i] == key) {
209         if (val)
210            *val = h->val[i];
211         return True;
212      }
213   }
214   return False;
215}
216
217
218/* Add key->val to the map.  Replaces any existing binding for key. */
219
220static void addToHHW ( HashHW* h, HWord key, HWord val )
221{
222   Int i, j;
223   /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224
225   /* Find and replace existing binding, if any. */
226   for (i = 0; i < h->used; i++) {
227      if (h->inuse[i] && h->key[i] == key) {
228         h->val[i] = val;
229         return;
230      }
231   }
232
233   /* Ensure a space is available. */
234   if (h->used == h->size) {
235      /* Copy into arrays twice the size. */
236      Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
237      HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
238      HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
239      for (i = j = 0; i < h->size; i++) {
240         if (!h->inuse[i]) continue;
241         inuse2[j] = True;
242         key2[j] = h->key[i];
243         val2[j] = h->val[i];
244         j++;
245      }
246      h->used = j;
247      h->size *= 2;
248      h->inuse = inuse2;
249      h->key = key2;
250      h->val = val2;
251   }
252
253   /* Finally, add it. */
254   vassert(h->used < h->size);
255   h->inuse[h->used] = True;
256   h->key[h->used] = key;
257   h->val[h->used] = val;
258   h->used++;
259}
260
261
262/*---------------------------------------------------------------*/
263/*--- Flattening out a BB into atomic SSA form                ---*/
264/*---------------------------------------------------------------*/
265
266/* Non-critical helper, heuristic for reducing the number of tmp-tmp
267   copies made by flattening.  If in doubt return False. */
268
269static Bool isFlat ( IRExpr* e )
270{
271   if (e->tag == Iex_Get)
272      return True;
273   if (e->tag == Iex_Binop)
274      return toBool( isIRAtom(e->Iex.Binop.arg1)
275                     && isIRAtom(e->Iex.Binop.arg2) );
276   if (e->tag == Iex_Load)
277      return isIRAtom(e->Iex.Load.addr);
278   return False;
279}
280
281/* Flatten out 'ex' so it is atomic, returning a new expression with
282   the same value, after having appended extra IRTemp assignments to
283   the end of 'bb'. */
284
285static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286{
287   Int i;
288   IRExpr** newargs;
289   IRType ty = typeOfIRExpr(bb->tyenv, ex);
290   IRTemp t1;
291
292   switch (ex->tag) {
293
294      case Iex_GetI:
295         t1 = newIRTemp(bb->tyenv, ty);
296         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297            IRExpr_GetI(ex->Iex.GetI.descr,
298                        flatten_Expr(bb, ex->Iex.GetI.ix),
299                        ex->Iex.GetI.bias)));
300         return IRExpr_RdTmp(t1);
301
302      case Iex_Get:
303         t1 = newIRTemp(bb->tyenv, ty);
304         addStmtToIRSB(bb,
305            IRStmt_WrTmp(t1, ex));
306         return IRExpr_RdTmp(t1);
307
308      case Iex_Qop: {
309         IRQop* qop = ex->Iex.Qop.details;
310         t1 = newIRTemp(bb->tyenv, ty);
311         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312            IRExpr_Qop(qop->op,
313                         flatten_Expr(bb, qop->arg1),
314                         flatten_Expr(bb, qop->arg2),
315                         flatten_Expr(bb, qop->arg3),
316                         flatten_Expr(bb, qop->arg4))));
317         return IRExpr_RdTmp(t1);
318      }
319
320      case Iex_Triop: {
321         IRTriop* triop = ex->Iex.Triop.details;
322         t1 = newIRTemp(bb->tyenv, ty);
323         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324            IRExpr_Triop(triop->op,
325                         flatten_Expr(bb, triop->arg1),
326                         flatten_Expr(bb, triop->arg2),
327                         flatten_Expr(bb, triop->arg3))));
328         return IRExpr_RdTmp(t1);
329      }
330
331      case Iex_Binop:
332         t1 = newIRTemp(bb->tyenv, ty);
333         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334            IRExpr_Binop(ex->Iex.Binop.op,
335                         flatten_Expr(bb, ex->Iex.Binop.arg1),
336                         flatten_Expr(bb, ex->Iex.Binop.arg2))));
337         return IRExpr_RdTmp(t1);
338
339      case Iex_Unop:
340         t1 = newIRTemp(bb->tyenv, ty);
341         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342            IRExpr_Unop(ex->Iex.Unop.op,
343                        flatten_Expr(bb, ex->Iex.Unop.arg))));
344         return IRExpr_RdTmp(t1);
345
346      case Iex_Load:
347         t1 = newIRTemp(bb->tyenv, ty);
348         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349            IRExpr_Load(ex->Iex.Load.end,
350                        ex->Iex.Load.ty,
351                        flatten_Expr(bb, ex->Iex.Load.addr))));
352         return IRExpr_RdTmp(t1);
353
354      case Iex_CCall:
355         newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356         for (i = 0; newargs[i]; i++)
357            newargs[i] = flatten_Expr(bb, newargs[i]);
358         t1 = newIRTemp(bb->tyenv, ty);
359         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360            IRExpr_CCall(ex->Iex.CCall.cee,
361                         ex->Iex.CCall.retty,
362                         newargs)));
363         return IRExpr_RdTmp(t1);
364
365      case Iex_ITE:
366         t1 = newIRTemp(bb->tyenv, ty);
367         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368            IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369                       flatten_Expr(bb, ex->Iex.ITE.iftrue),
370                       flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371         return IRExpr_RdTmp(t1);
372
373      case Iex_Const:
374         /* Lift F64i constants out onto temps so they can be CSEd
375            later. */
376         if (ex->Iex.Const.con->tag == Ico_F64i) {
377            t1 = newIRTemp(bb->tyenv, ty);
378            addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379               IRExpr_Const(ex->Iex.Const.con)));
380            return IRExpr_RdTmp(t1);
381         } else {
382            /* Leave all other constants alone. */
383            return ex;
384         }
385
386      case Iex_RdTmp:
387         return ex;
388
389      default:
390         vex_printf("\n");
391         ppIRExpr(ex);
392         vex_printf("\n");
393         vpanic("flatten_Expr");
394   }
395}
396
397
398/* Append a completely flattened form of 'st' to the end of 'bb'. */
399
400static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401{
402   Int i;
403   IRExpr   *e1, *e2, *e3, *e4, *e5;
404   IRDirty  *d,  *d2;
405   IRCAS    *cas, *cas2;
406   IRPutI   *puti, *puti2;
407   IRLoadG  *lg;
408   IRStoreG *sg;
409   switch (st->tag) {
410      case Ist_Put:
411         if (isIRAtom(st->Ist.Put.data)) {
412            /* optimisation to reduce the amount of heap wasted
413               by the flattener */
414            addStmtToIRSB(bb, st);
415         } else {
416            /* general case, always correct */
417            e1 = flatten_Expr(bb, st->Ist.Put.data);
418            addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419         }
420         break;
421      case Ist_PutI:
422         puti = st->Ist.PutI.details;
423         e1 = flatten_Expr(bb, puti->ix);
424         e2 = flatten_Expr(bb, puti->data);
425         puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426         addStmtToIRSB(bb, IRStmt_PutI(puti2));
427         break;
428      case Ist_WrTmp:
429         if (isFlat(st->Ist.WrTmp.data)) {
430            /* optimisation, to reduce the number of tmp-tmp
431               copies generated */
432            addStmtToIRSB(bb, st);
433         } else {
434            /* general case, always correct */
435            e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436            addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437         }
438         break;
439      case Ist_Store:
440         e1 = flatten_Expr(bb, st->Ist.Store.addr);
441         e2 = flatten_Expr(bb, st->Ist.Store.data);
442         addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443         break;
444      case Ist_StoreG:
445         sg = st->Ist.StoreG.details;
446         e1 = flatten_Expr(bb, sg->addr);
447         e2 = flatten_Expr(bb, sg->data);
448         e3 = flatten_Expr(bb, sg->guard);
449         addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450         break;
451      case Ist_LoadG:
452         lg = st->Ist.LoadG.details;
453         e1 = flatten_Expr(bb, lg->addr);
454         e2 = flatten_Expr(bb, lg->alt);
455         e3 = flatten_Expr(bb, lg->guard);
456         addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457                                        e1, e2, e3));
458         break;
459      case Ist_CAS:
460         cas  = st->Ist.CAS.details;
461         e1   = flatten_Expr(bb, cas->addr);
462         e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463         e3   = flatten_Expr(bb, cas->expdLo);
464         e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465         e5   = flatten_Expr(bb, cas->dataLo);
466         cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467                         e1, e2, e3, e4, e5 );
468         addStmtToIRSB(bb, IRStmt_CAS(cas2));
469         break;
470      case Ist_LLSC:
471         e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472         e2 = st->Ist.LLSC.storedata
473                 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474                 : NULL;
475         addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476                                       st->Ist.LLSC.result, e1, e2));
477         break;
478      case Ist_Dirty:
479         d = st->Ist.Dirty.details;
480         d2 = emptyIRDirty();
481         *d2 = *d;
482         d2->args = shallowCopyIRExprVec(d2->args);
483         if (d2->mFx != Ifx_None) {
484            d2->mAddr = flatten_Expr(bb, d2->mAddr);
485         } else {
486            vassert(d2->mAddr == NULL);
487         }
488         d2->guard = flatten_Expr(bb, d2->guard);
489         for (i = 0; d2->args[i]; i++) {
490            IRExpr* arg = d2->args[i];
491            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
492               d2->args[i] = flatten_Expr(bb, arg);
493         }
494         addStmtToIRSB(bb, IRStmt_Dirty(d2));
495         break;
496      case Ist_NoOp:
497      case Ist_MBE:
498      case Ist_IMark:
499         addStmtToIRSB(bb, st);
500         break;
501      case Ist_AbiHint:
502         e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505         break;
506      case Ist_Exit:
507         e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508         addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509                                       st->Ist.Exit.dst,
510                                       st->Ist.Exit.offsIP));
511         break;
512      default:
513         vex_printf("\n");
514         ppIRStmt(st);
515         vex_printf("\n");
516         vpanic("flatten_Stmt");
517   }
518}
519
520
521static IRSB* flatten_BB ( IRSB* in )
522{
523   Int   i;
524   IRSB* out;
525   out = emptyIRSB();
526   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527   for (i = 0; i < in->stmts_used; i++)
528      if (in->stmts[i])
529         flatten_Stmt( out, in->stmts[i] );
530   out->next     = flatten_Expr( out, in->next );
531   out->jumpkind = in->jumpkind;
532   out->offsIP   = in->offsIP;
533   return out;
534}
535
536
537/*---------------------------------------------------------------*/
538/*--- In-place removal of redundant GETs                      ---*/
539/*---------------------------------------------------------------*/
540
541/* Scan forwards, building up an environment binding (min offset, max
542   offset) pairs to values, which will either be temps or constants.
543
544   On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545   env and if it matches, replace the Get with the stored value.  If
546   there is no match, add a (minoff,maxoff) :-> t binding.
547
548   On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549   any binding which fully or partially overlaps with (minoff,maxoff).
550   Then add a new (minoff,maxoff) :-> t or c binding.  */
551
552/* Extract the min/max offsets from a guest state array descriptor. */
553
554inline
555static void getArrayBounds ( IRRegArray* descr,
556                             UInt* minoff, UInt* maxoff )
557{
558   *minoff = descr->base;
559   *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560   vassert((*minoff & ~0xFFFF) == 0);
561   vassert((*maxoff & ~0xFFFF) == 0);
562   vassert(*minoff <= *maxoff);
563}
564
565/* Create keys, of the form ((minoffset << 16) | maxoffset). */
566
567static UInt mk_key_GetPut ( Int offset, IRType ty )
568{
569   /* offset should fit in 16 bits. */
570   UInt minoff = offset;
571   UInt maxoff = minoff + sizeofIRType(ty) - 1;
572   vassert((minoff & ~0xFFFF) == 0);
573   vassert((maxoff & ~0xFFFF) == 0);
574   return (minoff << 16) | maxoff;
575}
576
577static UInt mk_key_GetIPutI ( IRRegArray* descr )
578{
579   UInt minoff, maxoff;
580   getArrayBounds( descr, &minoff, &maxoff );
581   vassert((minoff & ~0xFFFF) == 0);
582   vassert((maxoff & ~0xFFFF) == 0);
583   return (minoff << 16) | maxoff;
584}
585
586/* Supposing h has keys of the form generated by mk_key_GetPut and
587   mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588   .. k_hi).
589*/
590static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591{
592   Int  j;
593   UInt e_lo, e_hi;
594   vassert(k_lo <= k_hi);
595   /* invalidate any env entries which in any way overlap (k_lo
596      .. k_hi) */
597   /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598
599   for (j = 0; j < h->used; j++) {
600      if (!h->inuse[j])
601         continue;
602      e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603      e_hi = ((UInt)h->key[j]) & 0xFFFF;
604      vassert(e_lo <= e_hi);
605      if (e_hi < k_lo || k_hi < e_lo)
606         continue; /* no overlap possible */
607      else
608         /* overlap; invalidate */
609         h->inuse[j] = False;
610   }
611}
612
613
614static void redundant_get_removal_BB ( IRSB* bb )
615{
616   HashHW* env = newHHW();
617   UInt    key = 0; /* keep gcc -O happy */
618   Int     i, j;
619   HWord   val;
620
621   for (i = 0; i < bb->stmts_used; i++) {
622      IRStmt* st = bb->stmts[i];
623
624      if (st->tag == Ist_NoOp)
625         continue;
626
627      /* Deal with Gets */
628      if (st->tag == Ist_WrTmp
629          && st->Ist.WrTmp.data->tag == Iex_Get) {
630         /* st is 't = Get(...)'.  Look up in the environment and see
631            if the Get can be replaced. */
632         IRExpr* get = st->Ist.WrTmp.data;
633         key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634                                     get->Iex.Get.ty );
635         if (lookupHHW(env, &val, (HWord)key)) {
636            /* found it */
637            /* Note, we could do better here.  If the types are
638               different we don't do the substitution, since doing so
639               could lead to invalidly-typed IR.  An improvement would
640               be to stick in a reinterpret-style cast, although that
641               would make maintaining flatness more difficult. */
642            IRExpr* valE    = (IRExpr*)val;
643            Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644                                      == st->Ist.WrTmp.data->Iex.Get.ty );
645            if (typesOK && DEBUG_IROPT) {
646               vex_printf("rGET: "); ppIRExpr(get);
647               vex_printf("  ->  "); ppIRExpr(valE);
648               vex_printf("\n");
649            }
650            if (typesOK)
651               bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652         } else {
653            /* Not found, but at least we know that t and the Get(...)
654               are now associated.  So add a binding to reflect that
655               fact. */
656            addToHHW( env, (HWord)key,
657                           (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658         }
659      }
660
661      /* Deal with Puts: invalidate any env entries overlapped by this
662         Put */
663      if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664         UInt k_lo, k_hi;
665         if (st->tag == Ist_Put) {
666            key = mk_key_GetPut( st->Ist.Put.offset,
667                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668         } else {
669            vassert(st->tag == Ist_PutI);
670            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671         }
672
673         k_lo = (key >> 16) & 0xFFFF;
674         k_hi = key & 0xFFFF;
675         invalidateOverlaps(env, k_lo, k_hi);
676      }
677      else
678      if (st->tag == Ist_Dirty) {
679         /* Deal with dirty helpers which write or modify guest state.
680            Invalidate the entire env.  We could do a lot better
681            here. */
682         IRDirty* d      = st->Ist.Dirty.details;
683         Bool     writes = False;
684         for (j = 0; j < d->nFxState; j++) {
685            if (d->fxState[j].fx == Ifx_Modify
686                || d->fxState[j].fx == Ifx_Write)
687            writes = True;
688         }
689         if (writes) {
690            /* dump the entire env (not clever, but correct ...) */
691            for (j = 0; j < env->used; j++)
692               env->inuse[j] = False;
693            if (0) vex_printf("rGET: trash env due to dirty helper\n");
694         }
695      }
696
697      /* add this one to the env, if appropriate */
698      if (st->tag == Ist_Put) {
699         vassert(isIRAtom(st->Ist.Put.data));
700         addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701      }
702
703   } /* for (i = 0; i < bb->stmts_used; i++) */
704
705}
706
707
708/*---------------------------------------------------------------*/
709/*--- In-place removal of redundant PUTs                      ---*/
710/*---------------------------------------------------------------*/
711
712/* Find any Get uses in st and invalidate any partially or fully
713   overlapping ranges listed in env.  Due to the flattening phase, the
714   only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715
716static void handle_gets_Stmt (
717               HashHW* env,
718               IRStmt* st,
719               Bool (*preciseMemExnsFn)(Int,Int)
720            )
721{
722   Int     j;
723   UInt    key = 0; /* keep gcc -O happy */
724   Bool    isGet;
725   Bool    memRW = False;
726   IRExpr* e;
727
728   switch (st->tag) {
729
730      /* This is the only interesting case.  Deal with Gets in the RHS
731         expression. */
732      case Ist_WrTmp:
733         e = st->Ist.WrTmp.data;
734         switch (e->tag) {
735            case Iex_Get:
736               isGet = True;
737               key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
738               break;
739            case Iex_GetI:
740               isGet = True;
741               key = mk_key_GetIPutI ( e->Iex.GetI.descr );
742               break;
743            case Iex_Load:
744               isGet = False;
745               memRW = True;
746               break;
747            default:
748               isGet = False;
749         }
750         if (isGet) {
751            UInt k_lo, k_hi;
752            k_lo = (key >> 16) & 0xFFFF;
753            k_hi = key & 0xFFFF;
754            invalidateOverlaps(env, k_lo, k_hi);
755         }
756         break;
757
758      /* Be very conservative for dirty helper calls; dump the entire
759         environment.  The helper might read guest state, in which
760         case it needs to be flushed first.  Also, the helper might
761         access guest memory, in which case all parts of the guest
762         state requiring precise exceptions needs to be flushed.  The
763         crude solution is just to flush everything; we could easily
764         enough do a lot better if needed. */
765      /* Probably also overly-conservative, but also dump everything
766         if we hit a memory bus event (fence, lock, unlock).  Ditto
767         AbiHints, CASs, LLs and SCs. */
768      case Ist_AbiHint:
769         vassert(isIRAtom(st->Ist.AbiHint.base));
770         vassert(isIRAtom(st->Ist.AbiHint.nia));
771         /* fall through */
772      case Ist_MBE:
773      case Ist_Dirty:
774      case Ist_CAS:
775      case Ist_LLSC:
776         for (j = 0; j < env->used; j++)
777            env->inuse[j] = False;
778         break;
779
780      /* all other cases are boring. */
781      case Ist_Store:
782         vassert(isIRAtom(st->Ist.Store.addr));
783         vassert(isIRAtom(st->Ist.Store.data));
784         memRW = True;
785         break;
786      case Ist_StoreG: {
787         IRStoreG* sg = st->Ist.StoreG.details;
788         vassert(isIRAtom(sg->addr));
789         vassert(isIRAtom(sg->data));
790         vassert(isIRAtom(sg->guard));
791         memRW = True;
792         break;
793      }
794      case Ist_LoadG: {
795         IRLoadG* lg = st->Ist.LoadG.details;
796         vassert(isIRAtom(lg->addr));
797         vassert(isIRAtom(lg->alt));
798         vassert(isIRAtom(lg->guard));
799         memRW = True;
800         break;
801      }
802      case Ist_Exit:
803         vassert(isIRAtom(st->Ist.Exit.guard));
804         break;
805
806      case Ist_PutI:
807         vassert(isIRAtom(st->Ist.PutI.details->ix));
808         vassert(isIRAtom(st->Ist.PutI.details->data));
809         break;
810
811      case Ist_NoOp:
812      case Ist_IMark:
813         break;
814
815      default:
816         vex_printf("\n");
817         ppIRStmt(st);
818         vex_printf("\n");
819         vpanic("handle_gets_Stmt");
820   }
821
822   if (memRW) {
823      /* This statement accesses memory.  So we might need to dump all parts
824         of the environment corresponding to guest state that may not
825         be reordered with respect to memory references.  That means
826         at least the stack pointer. */
827      switch (vex_control.iropt_register_updates) {
828         case VexRegUpdAllregsAtMemAccess:
829            /* Precise exceptions required at mem access.
830               Flush all guest state. */
831            for (j = 0; j < env->used; j++)
832               env->inuse[j] = False;
833            break;
834         case VexRegUpdSpAtMemAccess:
835            /* We need to dump the stack pointer
836               (needed for stack extension in m_signals.c).
837               preciseMemExnsFn will use vex_control.iropt_register_updates
838               to verify only the sp is to be checked. */
839            /* fallthrough */
840         case VexRegUpdUnwindregsAtMemAccess:
841            for (j = 0; j < env->used; j++) {
842               if (!env->inuse[j])
843                  continue;
844               /* Just flush the minimal amount required, as computed by
845                  preciseMemExnsFn. */
846               HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
847               HWord k_hi = env->key[j] & 0xFFFF;
848               if (preciseMemExnsFn( k_lo, k_hi ))
849                  env->inuse[j] = False;
850            }
851            break;
852         default:
853            // VexRegUpdAllregsAtEachInsn cannot happen here.
854            // Neither any rubbish other value.
855            vassert(0);
856      }
857   } /* if (memRW) */
858
859}
860
861
862/* Scan backwards, building up a set of (min offset, max
863   offset) pairs, indicating those parts of the guest state
864   for which the next event is a write.
865
866   On seeing a conditional exit, empty the set.
867
868   On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
869   completely within the set, remove the Put.  Otherwise, add
870   (minoff,maxoff) to the set.
871
872   On seeing 'Get (minoff,maxoff)', remove any part of the set
873   overlapping (minoff,maxoff).  The same has to happen for any events
874   which implicitly read parts of the guest state: dirty helper calls
875   and loads/stores.
876*/
877
878static void redundant_put_removal_BB (
879               IRSB* bb,
880               Bool (*preciseMemExnsFn)(Int,Int)
881            )
882{
883   Int     i, j;
884   Bool    isPut;
885   IRStmt* st;
886   UInt    key = 0; /* keep gcc -O happy */
887
888   vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn);
889
890   HashHW* env = newHHW();
891
892   /* Initialise the running env with the fact that the final exit
893      writes the IP (or, whatever it claims to write.  We don't
894      care.) */
895   key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
896   addToHHW(env, (HWord)key, 0);
897
898   /* And now scan backwards through the statements. */
899   for (i = bb->stmts_used-1; i >= 0; i--) {
900      st = bb->stmts[i];
901
902      if (st->tag == Ist_NoOp)
903         continue;
904
905      /* Deal with conditional exits. */
906      if (st->tag == Ist_Exit) {
907         //Bool re_add;
908         /* Need to throw out from the env, any part of it which
909            doesn't overlap with the guest state written by this exit.
910            Since the exit only writes one section, it's simplest to
911            do this: (1) check whether env contains a write that
912            completely overlaps the write done by this exit; (2) empty
913            out env; and (3) if (1) was true, add the write done by
914            this exit.
915
916            To make (1) a bit simpler, merely search for a write that
917            exactly matches the one done by this exit.  That's safe
918            because it will fail as often or more often than a full
919            overlap check, and failure to find an overlapping write in
920            env is the safe case (we just nuke env if that
921            happens). */
922         //vassert(isIRAtom(st->Ist.Exit.guard));
923         /* (1) */
924         //key = mk_key_GetPut(st->Ist.Exit.offsIP,
925         //                    typeOfIRConst(st->Ist.Exit.dst));
926         //re_add = lookupHHW(env, NULL, key);
927         /* (2) */
928         for (j = 0; j < env->used; j++)
929            env->inuse[j] = False;
930         /* (3) */
931         //if (0 && re_add)
932         //   addToHHW(env, (HWord)key, 0);
933         continue;
934      }
935
936      /* Deal with Puts */
937      switch (st->tag) {
938         case Ist_Put:
939            isPut = True;
940            key = mk_key_GetPut( st->Ist.Put.offset,
941                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
942            vassert(isIRAtom(st->Ist.Put.data));
943            break;
944         case Ist_PutI:
945            isPut = True;
946            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
947            vassert(isIRAtom(st->Ist.PutI.details->ix));
948            vassert(isIRAtom(st->Ist.PutI.details->data));
949            break;
950         default:
951            isPut = False;
952      }
953      if (isPut && st->tag != Ist_PutI) {
954         /* See if any single entry in env overlaps this Put.  This is
955            simplistic in that the transformation is valid if, say, two
956            or more entries in the env overlap this Put, but the use of
957            lookupHHW will only find a single entry which exactly
958            overlaps this Put.  This is suboptimal but safe. */
959         if (lookupHHW(env, NULL, (HWord)key)) {
960            /* This Put is redundant because a later one will overwrite
961               it.  So NULL (nop) it out. */
962            if (DEBUG_IROPT) {
963               vex_printf("rPUT: "); ppIRStmt(st);
964               vex_printf("\n");
965            }
966            bb->stmts[i] = IRStmt_NoOp();
967         } else {
968            /* We can't demonstrate that this Put is redundant, so add it
969               to the running collection. */
970            addToHHW(env, (HWord)key, 0);
971         }
972         continue;
973      }
974
975      /* Deal with Gets.  These remove bits of the environment since
976         appearance of a Get means that the next event for that slice
977         of the guest state is no longer a write, but a read.  Also
978         deals with implicit reads of guest state needed to maintain
979         precise exceptions. */
980      handle_gets_Stmt( env, st, preciseMemExnsFn );
981   }
982}
983
984
985/*---------------------------------------------------------------*/
986/*--- Constant propagation and folding                        ---*/
987/*---------------------------------------------------------------*/
988
989#if STATS_IROPT
990/* How often sameIRExprs was invoked */
991static UInt invocation_count;
992/* How often sameIRExprs recursed through IRTemp assignments */
993static UInt recursion_count;
994/* How often sameIRExprs found identical IRExprs */
995static UInt success_count;
996/* How often recursing through assignments to IRTemps helped
997   establishing equality. */
998static UInt recursion_success_count;
999/* Whether or not recursing through an IRTemp assignment helped
1000   establishing IRExpr equality for a given sameIRExprs invocation. */
1001static Bool recursion_helped;
1002/* Whether or not a given sameIRExprs invocation recursed through an
1003   IRTemp assignment */
1004static Bool recursed;
1005/* Maximum number of nodes ever visited when comparing two IRExprs. */
1006static UInt max_nodes_visited;
1007#endif /* STATS_IROPT */
1008
1009/* Count the number of nodes visited for a given sameIRExprs invocation. */
1010static UInt num_nodes_visited;
1011
1012/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1013   This is to guard against performance degradation by visiting large
1014   trees without success. */
1015#define NODE_LIMIT 30
1016
1017
1018/* The env in this section is a map from IRTemp to IRExpr*,
1019   that is, an array indexed by IRTemp. */
1020
1021/* Do both expressions compute the same value? The answer is generally
1022   conservative, i.e. it will report that the expressions do not compute
1023   the same value when in fact they do. The reason is that we do not
1024   keep track of changes in the guest state and memory. Thusly, two
1025   Get's, GetI's or Load's, even when accessing the same location, will be
1026   assumed to compute different values. After all the accesses may happen
1027   at different times and the guest state / memory can have changed in
1028   the meantime.
1029
1030   XXX IMPORTANT XXX the two expressions must have the same IR type.
1031   DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1032
1033/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1034   wrapper that deals with the common tags-don't-match case, and a
1035   slower out of line general case.  Saves a few insns. */
1036
1037__attribute__((noinline))
1038static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1039
1040inline
1041static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1042{
1043   if (e1->tag != e2->tag) return False;
1044   return sameIRExprs_aux2(env, e1, e2);
1045}
1046
1047__attribute__((noinline))
1048static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1049{
1050   if (num_nodes_visited++ > NODE_LIMIT) return False;
1051
1052   switch (e1->tag) {
1053      case Iex_RdTmp:
1054         if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1055
1056         if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1057            Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1058                                        env[e2->Iex.RdTmp.tmp]);
1059#if STATS_IROPT
1060            recursed = True;
1061            if (same) recursion_helped = True;
1062#endif
1063            return same;
1064         }
1065         return False;
1066
1067      case Iex_Get:
1068      case Iex_GetI:
1069      case Iex_Load:
1070         /* Guest state / memory could have changed in the meantime. */
1071         return False;
1072
1073      case Iex_Binop:
1074         return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1075                        && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1076                                                 e2->Iex.Binop.arg1 )
1077                        && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1078                                                 e2->Iex.Binop.arg2 ));
1079
1080      case Iex_Unop:
1081         return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1082                        && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1083                                                 e2->Iex.Unop.arg ));
1084
1085      case Iex_Const: {
1086         IRConst *c1 = e1->Iex.Const.con;
1087         IRConst *c2 = e2->Iex.Const.con;
1088         vassert(c1->tag == c2->tag);
1089         switch (c1->tag) {
1090            case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
1091            case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
1092            case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
1093            case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
1094            case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
1095            default: break;
1096         }
1097         return False;
1098      }
1099
1100      case Iex_Triop: {
1101         IRTriop *tri1 = e1->Iex.Triop.details;
1102         IRTriop *tri2 = e2->Iex.Triop.details;
1103         return toBool( tri1->op == tri2->op
1104                        && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1105                        && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1106                        && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1107      }
1108
1109      case Iex_ITE:
1110         return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
1111                                                 e2->Iex.ITE.cond )
1112                        && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1113                                                 e2->Iex.ITE.iftrue )
1114                        && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1115                                                 e2->Iex.ITE.iffalse ));
1116
1117      default:
1118         /* Not very likely to be "same". */
1119         break;
1120   }
1121
1122   return False;
1123}
1124
1125inline
1126static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1127{
1128   Bool same;
1129
1130   num_nodes_visited = 0;
1131   same = sameIRExprs_aux(env, e1, e2);
1132
1133#if STATS_IROPT
1134   ++invocation_count;
1135   if (recursed) ++recursion_count;
1136   success_count += same;
1137   if (same && recursion_helped)
1138      ++recursion_success_count;
1139   if (num_nodes_visited > max_nodes_visited)
1140      max_nodes_visited = num_nodes_visited;
1141   recursed = False; /* reset */
1142   recursion_helped = False;  /* reset */
1143#endif /* STATS_IROPT */
1144
1145   return same;
1146}
1147
1148
1149/* Debugging-only hack (not used in production runs): make a guess
1150   whether sameIRExprs might assert due to the two args being of
1151   different types.  If in doubt return False.  Is only used when
1152   --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1153   Bad because it duplicates functionality from typeOfIRExpr.  See
1154   comment on the single use point below for rationale. */
1155static
1156Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1157{
1158   if (e1->tag != e2->tag) return False;
1159   switch (e1->tag) {
1160      case Iex_Const: {
1161         /* The only interesting case */
1162         IRConst *c1 = e1->Iex.Const.con;
1163         IRConst *c2 = e2->Iex.Const.con;
1164         return c1->tag != c2->tag;
1165      }
1166      default:
1167         break;
1168   }
1169   return False;
1170}
1171
1172
1173/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1174static Bool isZeroU32 ( IRExpr* e )
1175{
1176   return toBool( e->tag == Iex_Const
1177                  && e->Iex.Const.con->tag == Ico_U32
1178                  && e->Iex.Const.con->Ico.U32 == 0);
1179}
1180
1181/* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
1182static Bool isZeroU64 ( IRExpr* e )
1183{
1184   return toBool( e->tag == Iex_Const
1185                  && e->Iex.Const.con->tag == Ico_U64
1186                  && e->Iex.Const.con->Ico.U64 == 0);
1187}
1188
1189/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
1190static Bool isZeroV128 ( IRExpr* e )
1191{
1192   return toBool( e->tag == Iex_Const
1193                  && e->Iex.Const.con->tag == Ico_V128
1194                  && e->Iex.Const.con->Ico.V128 == 0x0000);
1195}
1196
1197/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
1198static Bool isZeroV256 ( IRExpr* e )
1199{
1200   return toBool( e->tag == Iex_Const
1201                  && e->Iex.Const.con->tag == Ico_V256
1202                  && e->Iex.Const.con->Ico.V256 == 0x00000000);
1203}
1204
1205/* Is this an integer constant with value 0 ? */
1206static Bool isZeroU ( IRExpr* e )
1207{
1208   if (e->tag != Iex_Const) return False;
1209   switch (e->Iex.Const.con->tag) {
1210      case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
1211      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
1212      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
1213      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
1214      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
1215      default: vpanic("isZeroU");
1216   }
1217}
1218
1219/* Is this an integer constant with value 1---1b ? */
1220static Bool isOnesU ( IRExpr* e )
1221{
1222   if (e->tag != Iex_Const) return False;
1223   switch (e->Iex.Const.con->tag) {
1224      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
1225      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1226      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
1227                                     == 0xFFFFFFFF);
1228      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
1229                                     == 0xFFFFFFFFFFFFFFFFULL);
1230      default: ppIRExpr(e); vpanic("isOnesU");
1231   }
1232}
1233
1234static Bool notBool ( Bool b )
1235{
1236   if (b == True) return False;
1237   if (b == False) return True;
1238   vpanic("notBool");
1239}
1240
1241/* Make a zero which has the same type as the result of the given
1242   primop. */
1243static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1244{
1245   switch (op) {
1246      case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1247      case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
1248      case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1249      case Iop_Sub32:
1250      case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1251      case Iop_And64:
1252      case Iop_Sub64:
1253      case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1254      case Iop_XorV128:
1255      case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1256      default: vpanic("mkZeroOfPrimopResultType: bad primop");
1257   }
1258}
1259
1260/* Make a value containing all 1-bits, which has the same type as the
1261   result of the given primop. */
1262static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1263{
1264   switch (op) {
1265      case Iop_CmpEQ32:
1266      case Iop_CmpEQ64:
1267         return IRExpr_Const(IRConst_U1(toBool(1)));
1268      case Iop_Or8:
1269         return IRExpr_Const(IRConst_U8(0xFF));
1270      case Iop_Or16:
1271         return IRExpr_Const(IRConst_U16(0xFFFF));
1272      case Iop_Or32:
1273         return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1274      case Iop_CmpEQ8x8:
1275      case Iop_Or64:
1276         return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1277      case Iop_CmpEQ8x16:
1278      case Iop_CmpEQ16x8:
1279      case Iop_CmpEQ32x4:
1280         return IRExpr_Const(IRConst_V128(0xFFFF));
1281      default:
1282         ppIROp(op);
1283         vpanic("mkOnesOfPrimopResultType: bad primop");
1284   }
1285}
1286
1287/* Helpers for folding Clz32/64. */
1288static UInt fold_Clz64 ( ULong value )
1289{
1290   UInt i;
1291   vassert(value != 0ULL); /* no defined semantics for arg==0 */
1292   for (i = 0; i < 64; ++i) {
1293      if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1294   }
1295   vassert(0);
1296   /*NOTREACHED*/
1297   return 0;
1298}
1299
1300static UInt fold_Clz32 ( UInt value )
1301{
1302   UInt i;
1303   vassert(value != 0); /* no defined semantics for arg==0 */
1304   for (i = 0; i < 32; ++i) {
1305      if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1306   }
1307   vassert(0);
1308   /*NOTREACHED*/
1309   return 0;
1310}
1311
1312/* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
1313   the corresponding real constant. */
1314//XXX re-check this before use
1315//static ULong de_summarise_V64 ( UChar v64 )
1316//{
1317//   ULong r = 0;
1318//   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1319//   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1320//   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1321//   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1322//   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1323//   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1324//   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1325//   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1326//   return r;
1327//}
1328
1329/* Helper for arbitrary expression pattern matching in flat IR.  If
1330   'e' is a reference to a tmp, look it up in env -- repeatedly, if
1331   necessary -- until it resolves to a non-tmp.  Note that this can
1332   return NULL if it can't resolve 'e' to a new expression, which will
1333   be the case if 'e' is instead defined by an IRStmt (IRDirty or
1334   LLSC). */
1335static IRExpr* chase ( IRExpr** env, IRExpr* e )
1336{
1337   /* Why is this loop guaranteed to terminate?  Because all tmps must
1338      have definitions before use, hence a tmp cannot be bound
1339      (directly or indirectly) to itself. */
1340   while (e->tag == Iex_RdTmp) {
1341      if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1342      e = env[(Int)e->Iex.RdTmp.tmp];
1343      if (e == NULL) break;
1344   }
1345   return e;
1346}
1347
1348static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1349{
1350   Int     shift;
1351   IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1352
1353   switch (e->tag) {
1354   case Iex_Unop:
1355      /* UNARY ops */
1356      if (e->Iex.Unop.arg->tag == Iex_Const) {
1357         switch (e->Iex.Unop.op) {
1358         case Iop_1Uto8:
1359            e2 = IRExpr_Const(IRConst_U8(toUChar(
1360                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1361                    ? 1 : 0)));
1362            break;
1363         case Iop_1Uto32:
1364            e2 = IRExpr_Const(IRConst_U32(
1365                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1366                    ? 1 : 0));
1367            break;
1368         case Iop_1Uto64:
1369            e2 = IRExpr_Const(IRConst_U64(
1370                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1371                    ? 1 : 0));
1372            break;
1373
1374         case Iop_1Sto8:
1375            e2 = IRExpr_Const(IRConst_U8(toUChar(
1376                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1377                    ? 0xFF : 0)));
1378            break;
1379         case Iop_1Sto16:
1380            e2 = IRExpr_Const(IRConst_U16(toUShort(
1381                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1382                    ? 0xFFFF : 0)));
1383            break;
1384         case Iop_1Sto32:
1385            e2 = IRExpr_Const(IRConst_U32(
1386                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1387                    ? 0xFFFFFFFF : 0));
1388            break;
1389         case Iop_1Sto64:
1390            e2 = IRExpr_Const(IRConst_U64(
1391                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1392                    ? 0xFFFFFFFFFFFFFFFFULL : 0));
1393            break;
1394
1395         case Iop_8Sto32: {
1396            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1397            s32 <<= 24;
1398            s32 >>= 24;
1399            e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1400            break;
1401         }
1402         case Iop_16Sto32: {
1403            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1404            s32 <<= 16;
1405            s32 >>= 16;
1406            e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
1407            break;
1408         }
1409         case Iop_8Uto64:
1410            e2 = IRExpr_Const(IRConst_U64(
1411                    0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1412            break;
1413         case Iop_16Uto64:
1414            e2 = IRExpr_Const(IRConst_U64(
1415                    0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1416            break;
1417         case Iop_8Uto32:
1418            e2 = IRExpr_Const(IRConst_U32(
1419                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1420            break;
1421         case Iop_8Sto16: {
1422            /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1423            s16 <<= 8;
1424            s16 >>= 8;
1425            e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
1426            break;
1427         }
1428         case Iop_8Uto16:
1429            e2 = IRExpr_Const(IRConst_U16(
1430                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1431            break;
1432         case Iop_16Uto32:
1433            e2 = IRExpr_Const(IRConst_U32(
1434                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1435            break;
1436         case Iop_32to16:
1437            e2 = IRExpr_Const(IRConst_U16(toUShort(
1438                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1439            break;
1440         case Iop_32to8:
1441            e2 = IRExpr_Const(IRConst_U8(toUChar(
1442                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1443            break;
1444         case Iop_32to1:
1445            e2 = IRExpr_Const(IRConst_U1(toBool(
1446                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1447                 )));
1448            break;
1449         case Iop_64to1:
1450            e2 = IRExpr_Const(IRConst_U1(toBool(
1451                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1452                 )));
1453            break;
1454
1455         case Iop_NotV128:
1456            e2 = IRExpr_Const(IRConst_V128(
1457                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1458            break;
1459         case Iop_Not64:
1460            e2 = IRExpr_Const(IRConst_U64(
1461                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1462            break;
1463         case Iop_Not32:
1464            e2 = IRExpr_Const(IRConst_U32(
1465                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1466            break;
1467         case Iop_Not16:
1468            e2 = IRExpr_Const(IRConst_U16(toUShort(
1469                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1470            break;
1471         case Iop_Not8:
1472            e2 = IRExpr_Const(IRConst_U8(toUChar(
1473                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1474            break;
1475
1476         case Iop_Not1:
1477            e2 = IRExpr_Const(IRConst_U1(
1478                    notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1479            break;
1480
1481         case Iop_64to8: {
1482            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1483            w64 &= 0xFFULL;
1484            e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1485            break;
1486         }
1487         case Iop_64to16: {
1488            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1489            w64 &= 0xFFFFULL;
1490            e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1491            break;
1492         }
1493         case Iop_64to32: {
1494            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1495            w64 &= 0x00000000FFFFFFFFULL;
1496            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1497            break;
1498         }
1499         case Iop_64HIto32: {
1500            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1501            w64 >>= 32;
1502            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1503            break;
1504         }
1505         case Iop_32Uto64:
1506            e2 = IRExpr_Const(IRConst_U64(
1507                    0xFFFFFFFFULL
1508                    & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1509            break;
1510         case Iop_16Sto64: {
1511            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1512            s64 <<= 48;
1513            s64 >>= 48;
1514            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1515            break;
1516         }
1517         case Iop_32Sto64: {
1518            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1519            s64 <<= 32;
1520            s64 >>= 32;
1521            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1522            break;
1523         }
1524
1525         case Iop_16to8: {
1526            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1527            w16 &= 0xFF;
1528            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1529            break;
1530         }
1531         case Iop_16HIto8: {
1532            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1533            w16 >>= 8;
1534            w16 &= 0xFF;
1535            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1536            break;
1537         }
1538
1539         case Iop_CmpNEZ8:
1540            e2 = IRExpr_Const(IRConst_U1(toBool(
1541                    0 !=
1542                    (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1543                 )));
1544            break;
1545         case Iop_CmpNEZ32:
1546            e2 = IRExpr_Const(IRConst_U1(toBool(
1547                    0 !=
1548                    (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1549                 )));
1550            break;
1551         case Iop_CmpNEZ64:
1552            e2 = IRExpr_Const(IRConst_U1(toBool(
1553                    0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1554                 )));
1555            break;
1556
1557         case Iop_CmpwNEZ32: {
1558            UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1559            if (w32 == 0)
1560               e2 = IRExpr_Const(IRConst_U32( 0 ));
1561            else
1562               e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1563            break;
1564         }
1565         case Iop_CmpwNEZ64: {
1566            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1567            if (w64 == 0)
1568               e2 = IRExpr_Const(IRConst_U64( 0 ));
1569            else
1570               e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1571            break;
1572         }
1573
1574         case Iop_Left32: {
1575            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1576            Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1577            s32 = (s32 | (-s32));
1578            e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1579            break;
1580         }
1581
1582         case Iop_Left64: {
1583            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1584            Long  s64 = (Long)u64;
1585            s64 = (s64 | (-s64));
1586            e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1587            break;
1588         }
1589
1590         case Iop_Clz32: {
1591            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1592            if (u32 != 0)
1593               e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1594            break;
1595         }
1596         case Iop_Clz64: {
1597            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1598            if (u64 != 0ULL)
1599               e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1600            break;
1601         }
1602
1603         /* For these vector ones, can't fold all cases, but at least
1604            do the most obvious one.  Could do better here using
1605            summarise/desummarise of vector constants, but too
1606            difficult to verify; hence just handle the zero cases. */
1607         case Iop_32UtoV128: {
1608            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1609            if (0 == u32) {
1610               e2 = IRExpr_Const(IRConst_V128(0x0000));
1611            } else {
1612               goto unhandled;
1613            }
1614            break;
1615         }
1616         case Iop_V128to64: {
1617            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1618            if (0 == ((v128 >> 0) & 0xFF)) {
1619               e2 = IRExpr_Const(IRConst_U64(0));
1620            } else {
1621               goto unhandled;
1622            }
1623            break;
1624         }
1625         case Iop_V128HIto64: {
1626            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1627            if (0 == ((v128 >> 8) & 0xFF)) {
1628               e2 = IRExpr_Const(IRConst_U64(0));
1629            } else {
1630               goto unhandled;
1631            }
1632            break;
1633         }
1634         case Iop_64UtoV128: {
1635            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1636            if (0 == u64) {
1637               e2 = IRExpr_Const(IRConst_V128(0x0000));
1638            } else {
1639               goto unhandled;
1640            }
1641            break;
1642         }
1643
1644         /* Even stupider (although still correct ..) */
1645         case Iop_V256to64_0: case Iop_V256to64_1:
1646         case Iop_V256to64_2: case Iop_V256to64_3: {
1647            UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1648            if (v256 == 0x00000000) {
1649               e2 = IRExpr_Const(IRConst_U64(0));
1650            } else {
1651               goto unhandled;
1652            }
1653            break;
1654         }
1655
1656         default:
1657            goto unhandled;
1658      }
1659      }
1660      break;
1661
1662   case Iex_Binop:
1663      /* BINARY ops */
1664      if (e->Iex.Binop.arg1->tag == Iex_Const
1665          && e->Iex.Binop.arg2->tag == Iex_Const) {
1666         /* cases where both args are consts */
1667         switch (e->Iex.Binop.op) {
1668
1669            /* -- Or -- */
1670            case Iop_Or8:
1671               e2 = IRExpr_Const(IRConst_U8(toUChar(
1672                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1673                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1674               break;
1675            case Iop_Or16:
1676               e2 = IRExpr_Const(IRConst_U16(toUShort(
1677                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1678                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1679               break;
1680            case Iop_Or32:
1681               e2 = IRExpr_Const(IRConst_U32(
1682                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1683                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1684               break;
1685            case Iop_Or64:
1686               e2 = IRExpr_Const(IRConst_U64(
1687                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1688                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1689               break;
1690            case Iop_OrV128:
1691               e2 = IRExpr_Const(IRConst_V128(
1692                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1693                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1694               break;
1695
1696            /* -- Xor -- */
1697            case Iop_Xor8:
1698               e2 = IRExpr_Const(IRConst_U8(toUChar(
1699                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1700                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1701               break;
1702            case Iop_Xor16:
1703               e2 = IRExpr_Const(IRConst_U16(toUShort(
1704                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1705                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1706               break;
1707            case Iop_Xor32:
1708               e2 = IRExpr_Const(IRConst_U32(
1709                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1710                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1711               break;
1712            case Iop_Xor64:
1713               e2 = IRExpr_Const(IRConst_U64(
1714                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1715                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1716               break;
1717            case Iop_XorV128:
1718               e2 = IRExpr_Const(IRConst_V128(
1719                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1720                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1721               break;
1722
1723            /* -- And -- */
1724            case Iop_And8:
1725               e2 = IRExpr_Const(IRConst_U8(toUChar(
1726                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1727                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1728               break;
1729            case Iop_And16:
1730               e2 = IRExpr_Const(IRConst_U16(toUShort(
1731                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1732                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1733               break;
1734            case Iop_And32:
1735               e2 = IRExpr_Const(IRConst_U32(
1736                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1737                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1738               break;
1739            case Iop_And64:
1740               e2 = IRExpr_Const(IRConst_U64(
1741                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1742                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1743               break;
1744            case Iop_AndV128:
1745               e2 = IRExpr_Const(IRConst_V128(
1746                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1747                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1748               break;
1749
1750            /* -- Add -- */
1751            case Iop_Add8:
1752               e2 = IRExpr_Const(IRConst_U8(toUChar(
1753                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1754                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1755               break;
1756            case Iop_Add32:
1757               e2 = IRExpr_Const(IRConst_U32(
1758                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1759                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1760               break;
1761            case Iop_Add64:
1762               e2 = IRExpr_Const(IRConst_U64(
1763                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1764                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1765               break;
1766
1767            /* -- Sub -- */
1768            case Iop_Sub8:
1769               e2 = IRExpr_Const(IRConst_U8(toUChar(
1770                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1771                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1772               break;
1773            case Iop_Sub32:
1774               e2 = IRExpr_Const(IRConst_U32(
1775                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1776                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1777               break;
1778            case Iop_Sub64:
1779               e2 = IRExpr_Const(IRConst_U64(
1780                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1781                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1782               break;
1783
1784            /* -- Max32U -- */
1785            case Iop_Max32U: {
1786               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1787               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1788               UInt res  = u32a > u32b ? u32a : u32b;
1789               e2 = IRExpr_Const(IRConst_U32(res));
1790               break;
1791            }
1792
1793            /* -- Mul -- */
1794            case Iop_Mul32:
1795               e2 = IRExpr_Const(IRConst_U32(
1796                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1797                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1798               break;
1799            case Iop_Mul64:
1800               e2 = IRExpr_Const(IRConst_U64(
1801                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1802                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1803               break;
1804
1805            case Iop_MullS32: {
1806               /* very paranoid */
1807               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1808               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1809               Int   s32a = (Int)u32a;
1810               Int   s32b = (Int)u32b;
1811               Long  s64a = (Long)s32a;
1812               Long  s64b = (Long)s32b;
1813               Long  sres = s64a * s64b;
1814               ULong ures = (ULong)sres;
1815               e2 = IRExpr_Const(IRConst_U64(ures));
1816               break;
1817            }
1818
1819            /* -- Shl -- */
1820            case Iop_Shl32:
1821               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1822               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1823               if (shift >= 0 && shift <= 31)
1824                  e2 = IRExpr_Const(IRConst_U32(
1825                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1826                           << shift)));
1827               break;
1828            case Iop_Shl64:
1829               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1830               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1831               if (shift >= 0 && shift <= 63)
1832                  e2 = IRExpr_Const(IRConst_U64(
1833                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1834                           << shift)));
1835               break;
1836
1837            /* -- Sar -- */
1838            case Iop_Sar32: {
1839               /* paranoid ... */
1840               /*signed*/ Int s32;
1841               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1842               s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1843               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1844               if (shift >= 0 && shift <= 31) {
1845                  s32 >>=/*signed*/ shift;
1846                  e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1847               }
1848               break;
1849            }
1850            case Iop_Sar64: {
1851               /* paranoid ... */
1852               /*signed*/ Long s64;
1853               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1854               s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1855               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1856               if (shift >= 0 && shift <= 63) {
1857                  s64 >>=/*signed*/ shift;
1858                  e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1859               }
1860               break;
1861            }
1862
1863            /* -- Shr -- */
1864            case Iop_Shr32: {
1865               /* paranoid ... */
1866               /*unsigned*/ UInt u32;
1867               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1868               u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1869               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1870               if (shift >= 0 && shift <= 31) {
1871                  u32 >>=/*unsigned*/ shift;
1872                  e2 = IRExpr_Const(IRConst_U32(u32));
1873               }
1874               break;
1875            }
1876            case Iop_Shr64: {
1877               /* paranoid ... */
1878               /*unsigned*/ ULong u64;
1879               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1880               u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1881               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1882               if (shift >= 0 && shift <= 63) {
1883                  u64 >>=/*unsigned*/ shift;
1884                  e2 = IRExpr_Const(IRConst_U64(u64));
1885               }
1886               break;
1887            }
1888
1889            /* -- CmpEQ -- */
1890            case Iop_CmpEQ32:
1891               e2 = IRExpr_Const(IRConst_U1(toBool(
1892                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1893                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1894               break;
1895            case Iop_CmpEQ64:
1896               e2 = IRExpr_Const(IRConst_U1(toBool(
1897                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1898                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1899               break;
1900
1901            /* -- CmpNE -- */
1902            case Iop_CmpNE8:
1903            case Iop_CasCmpNE8:
1904            case Iop_ExpCmpNE8:
1905               e2 = IRExpr_Const(IRConst_U1(toBool(
1906                       ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1907                        != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1908               break;
1909            case Iop_CmpNE32:
1910            case Iop_CasCmpNE32:
1911            case Iop_ExpCmpNE32:
1912               e2 = IRExpr_Const(IRConst_U1(toBool(
1913                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1914                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1915               break;
1916            case Iop_CmpNE64:
1917            case Iop_CasCmpNE64:
1918            case Iop_ExpCmpNE64:
1919               e2 = IRExpr_Const(IRConst_U1(toBool(
1920                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1921                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1922               break;
1923
1924            /* -- CmpLEU -- */
1925            case Iop_CmpLE32U:
1926               e2 = IRExpr_Const(IRConst_U1(toBool(
1927                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1928                        <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1929               break;
1930            case Iop_CmpLE64U:
1931               e2 = IRExpr_Const(IRConst_U1(toBool(
1932                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1933                        <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1934               break;
1935
1936            /* -- CmpLES -- */
1937            case Iop_CmpLE32S:
1938               e2 = IRExpr_Const(IRConst_U1(toBool(
1939                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1940                        <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1941               break;
1942            case Iop_CmpLE64S:
1943               e2 = IRExpr_Const(IRConst_U1(toBool(
1944                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1945                        <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1946               break;
1947
1948            /* -- CmpLTS -- */
1949            case Iop_CmpLT32S:
1950               e2 = IRExpr_Const(IRConst_U1(toBool(
1951                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1952                        < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1953               break;
1954            case Iop_CmpLT64S:
1955               e2 = IRExpr_Const(IRConst_U1(toBool(
1956                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1957                        < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1958               break;
1959
1960            /* -- CmpLTU -- */
1961            case Iop_CmpLT32U:
1962               e2 = IRExpr_Const(IRConst_U1(toBool(
1963                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1964                        < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1965               break;
1966            case Iop_CmpLT64U:
1967               e2 = IRExpr_Const(IRConst_U1(toBool(
1968                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1969                        < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1970               break;
1971
1972            /* -- CmpORD -- */
1973            case Iop_CmpORD32S: {
1974               /* very paranoid */
1975               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1976               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1977               Int   s32a = (Int)u32a;
1978               Int   s32b = (Int)u32b;
1979               Int   r = 0x2; /* EQ */
1980               if (s32a < s32b) {
1981                  r = 0x8; /* LT */
1982               }
1983               else if (s32a > s32b) {
1984                  r = 0x4; /* GT */
1985               }
1986               e2 = IRExpr_Const(IRConst_U32(r));
1987               break;
1988            }
1989
1990            /* -- nHLto2n -- */
1991            case Iop_32HLto64:
1992               e2 = IRExpr_Const(IRConst_U64(
1993                       (((ULong)(e->Iex.Binop.arg1
1994                                  ->Iex.Const.con->Ico.U32)) << 32)
1995                       | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1996                    ));
1997               break;
1998            case Iop_64HLto128:
1999               /* We can't fold this, because there is no way to
2000                  express he result in IR, but at least pretend to
2001                  handle it, so as to stop getting blasted with
2002                  no-rule-for-this-primop messages. */
2003               break;
2004            /* For this vector one, can't fold all cases, but at
2005               least do the most obvious one.  Could do better here
2006               using summarise/desummarise of vector constants, but
2007               too difficult to verify; hence just handle the zero
2008               cases. */
2009            case Iop_64HLtoV128: {
2010               ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2011               ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2012               if (0 == argHi && 0 == argLo) {
2013                  e2 = IRExpr_Const(IRConst_V128(0));
2014               } else {
2015                  goto unhandled;
2016               }
2017               break;
2018            }
2019            /* Same reasoning for the 256-bit version. */
2020            case Iop_V128HLtoV256: {
2021               IRExpr* argHi = e->Iex.Binop.arg1;
2022               IRExpr* argLo = e->Iex.Binop.arg2;
2023               if (isZeroV128(argHi) && isZeroV128(argLo)) {
2024                  e2 = IRExpr_Const(IRConst_V256(0));
2025               } else {
2026                  goto unhandled;
2027               }
2028               break;
2029            }
2030
2031            /* -- V128 stuff -- */
2032            case Iop_InterleaveLO8x16: {
2033               /* This turns up a lot in Memcheck instrumentation of
2034                  Icc generated code.  I don't know why. */
2035               UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2036               UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2037               if (0 == arg1 && 0 == arg2) {
2038                  e2 = IRExpr_Const(IRConst_V128(0));
2039               } else {
2040                  goto unhandled;
2041               }
2042               break;
2043            }
2044
2045            default:
2046               goto unhandled;
2047         }
2048
2049      } else {
2050
2051         /* other cases (identities, etc) */
2052         switch (e->Iex.Binop.op) {
2053
2054            case Iop_Shl32:
2055            case Iop_Shl64:
2056            case Iop_Shr64:
2057               /* Shl32/Shl64/Shr64(x,0) ==> x */
2058               if (isZeroU(e->Iex.Binop.arg2)) {
2059                  e2 = e->Iex.Binop.arg1;
2060                  break;
2061               }
2062               /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2063               if (isZeroU(e->Iex.Binop.arg1)) {
2064                  e2 = e->Iex.Binop.arg1;
2065                  break;
2066               }
2067               break;
2068
2069            case Iop_Shr32:
2070               /* Shr32(x,0) ==> x */
2071               if (isZeroU(e->Iex.Binop.arg2)) {
2072                  e2 = e->Iex.Binop.arg1;
2073                  break;
2074               }
2075               break;
2076
2077            case Iop_Or8:
2078            case Iop_Or16:
2079            case Iop_Or32:
2080            case Iop_Or64:
2081            case Iop_Max32U:
2082               /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2083               if (isZeroU(e->Iex.Binop.arg2)) {
2084                  e2 = e->Iex.Binop.arg1;
2085                  break;
2086               }
2087               /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2088               if (isZeroU(e->Iex.Binop.arg1)) {
2089                  e2 = e->Iex.Binop.arg2;
2090                  break;
2091               }
2092               /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2093               /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2094               if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2095                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2096                  break;
2097               }
2098               /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2099               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2100                  e2 = e->Iex.Binop.arg1;
2101                  break;
2102               }
2103               break;
2104
2105            case Iop_Add8:
2106               /* Add8(t,t) ==> t << 1.
2107                  Memcheck doesn't understand that
2108                  x+x produces a defined least significant bit, and it seems
2109                  simplest just to get rid of the problem by rewriting it
2110                  out, since the opportunity to do so exists. */
2111               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2112                  e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2113                                    IRExpr_Const(IRConst_U8(1)));
2114                  break;
2115               }
2116               break;
2117
2118               /* NB no Add16(t,t) case yet as no known test case exists */
2119
2120            case Iop_Add32:
2121            case Iop_Add64:
2122               /* Add32/Add64(x,0) ==> x */
2123               if (isZeroU(e->Iex.Binop.arg2)) {
2124                  e2 = e->Iex.Binop.arg1;
2125                  break;
2126               }
2127               /* Add32/Add64(0,x) ==> x */
2128               if (isZeroU(e->Iex.Binop.arg1)) {
2129                  e2 = e->Iex.Binop.arg2;
2130                  break;
2131               }
2132               /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2133               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2134                  e2 = IRExpr_Binop(
2135                          e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2136                          e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2137                  break;
2138               }
2139               break;
2140
2141            case Iop_Sub32:
2142            case Iop_Sub64:
2143               /* Sub32/Sub64(x,0) ==> x */
2144               if (isZeroU(e->Iex.Binop.arg2)) {
2145                  e2 = e->Iex.Binop.arg1;
2146                  break;
2147               }
2148               /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2149               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2150                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2151                  break;
2152               }
2153               break;
2154            case Iop_Sub8x16:
2155               /* Sub8x16(x,0) ==> x */
2156               if (isZeroV128(e->Iex.Binop.arg2)) {
2157                  e2 = e->Iex.Binop.arg1;
2158                  break;
2159               }
2160               break;
2161
2162            case Iop_And64:
2163            case Iop_And32:
2164               /* And32/And64(x,1---1b) ==> x */
2165               if (isOnesU(e->Iex.Binop.arg2)) {
2166                  e2 = e->Iex.Binop.arg1;
2167                  break;
2168               }
2169               /* And32/And64(x,0) ==> 0 */
2170               if (isZeroU(e->Iex.Binop.arg2)) {
2171                  e2 = e->Iex.Binop.arg2;
2172                  break;
2173               }
2174               /* And32/And64(0,x) ==> 0 */
2175               if (isZeroU(e->Iex.Binop.arg1)) {
2176                  e2 = e->Iex.Binop.arg1;
2177                  break;
2178               }
2179               /* And32/And64(t,t) ==> t, for some IRTemp t */
2180               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2181                  e2 = e->Iex.Binop.arg1;
2182                  break;
2183               }
2184               break;
2185
2186            case Iop_And8:
2187            case Iop_And16:
2188            case Iop_AndV128:
2189            case Iop_AndV256:
2190               /* And8/And16/AndV128/AndV256(t,t)
2191                  ==> t, for some IRTemp t */
2192               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2193                  e2 = e->Iex.Binop.arg1;
2194                  break;
2195               }
2196               /* Deal with either arg zero.  Could handle other And
2197                  cases here too. */
2198               if (e->Iex.Binop.op == Iop_And64
2199                   && (isZeroU64(e->Iex.Binop.arg1)
2200                       || isZeroU64(e->Iex.Binop.arg2))) {
2201                  e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2202                  break;
2203               } else if (e->Iex.Binop.op == Iop_AndV128
2204                          && (isZeroV128(e->Iex.Binop.arg1)
2205                              || isZeroV128(e->Iex.Binop.arg2))) {
2206                  e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2207                  break;
2208               }
2209               break;
2210
2211            case Iop_OrV128:
2212            case Iop_OrV256:
2213               /* V128/V256(t,t) ==> t, for some IRTemp t */
2214               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2215                  e2 = e->Iex.Binop.arg1;
2216                  break;
2217               }
2218               /* OrV128(t,0) ==> t */
2219               if (e->Iex.Binop.op == Iop_OrV128) {
2220                  if (isZeroV128(e->Iex.Binop.arg2)) {
2221                     e2 = e->Iex.Binop.arg1;
2222                     break;
2223                  }
2224                  if (isZeroV128(e->Iex.Binop.arg1)) {
2225                     e2 = e->Iex.Binop.arg2;
2226                     break;
2227                  }
2228               }
2229               /* OrV256(t,0) ==> t */
2230               if (e->Iex.Binop.op == Iop_OrV256) {
2231                  if (isZeroV256(e->Iex.Binop.arg2)) {
2232                     e2 = e->Iex.Binop.arg1;
2233                     break;
2234                  }
2235                  //Disabled because there's no known test case right now.
2236                  //if (isZeroV256(e->Iex.Binop.arg1)) {
2237                  //   e2 = e->Iex.Binop.arg2;
2238                  //   break;
2239                  //}
2240               }
2241               break;
2242
2243            case Iop_Xor8:
2244            case Iop_Xor16:
2245            case Iop_Xor32:
2246            case Iop_Xor64:
2247            case Iop_XorV128:
2248               /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2249               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2250                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2251                  break;
2252               }
2253               break;
2254
2255            case Iop_CmpNE32:
2256               /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2257               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2258                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2259                  break;
2260               }
2261               /* CmpNE32(1Uto32(b), 0) ==> b */
2262               if (isZeroU32(e->Iex.Binop.arg2)) {
2263                  IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2264                  if (a1 && a1->tag == Iex_Unop
2265                         && a1->Iex.Unop.op == Iop_1Uto32) {
2266                     e2 = a1->Iex.Unop.arg;
2267                     break;
2268                  }
2269               }
2270               break;
2271
2272            case Iop_CmpEQ32:
2273            case Iop_CmpEQ64:
2274            case Iop_CmpEQ8x8:
2275            case Iop_CmpEQ8x16:
2276            case Iop_CmpEQ16x8:
2277            case Iop_CmpEQ32x4:
2278               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2279                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2280                  break;
2281               }
2282               break;
2283
2284            default:
2285               break;
2286         }
2287      }
2288      break;
2289
2290   case Iex_ITE:
2291      /* ITE */
2292      /* is the discriminant is a constant? */
2293      if (e->Iex.ITE.cond->tag == Iex_Const) {
2294         /* assured us by the IR type rules */
2295         vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2296         e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2297                 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2298      }
2299      else
2300      /* are the arms identical? (pretty weedy test) */
2301      if (sameIRExprs(env, e->Iex.ITE.iftrue,
2302                           e->Iex.ITE.iffalse)) {
2303         e2 = e->Iex.ITE.iffalse;
2304      }
2305      break;
2306
2307   default:
2308      /* not considered */
2309      break;
2310   }
2311
2312   /* Show cases where we've found but not folded 'op(t,t)'.  Be
2313      careful not to call sameIRExprs with values of different types,
2314      though, else it will assert (and so it should!).  We can't
2315      conveniently call typeOfIRExpr on the two args without a whole
2316      bunch of extra plumbing to pass in a type env, so just use a
2317      hacky test to check the arguments are not anything that might
2318      sameIRExprs to assert.  This is only OK because this kludge is
2319      only used for debug printing, not for "real" operation.  For
2320      "real" operation (ie, all other calls to sameIRExprs), it is
2321      essential that the to args have the same type.
2322
2323      The "right" solution is to plumb the containing block's
2324      IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
2325      that's a bunch of extra parameter passing which will just slow
2326      down the normal case, for no purpose. */
2327   if (vex_control.iropt_verbosity > 0
2328       && e == e2
2329       && e->tag == Iex_Binop
2330       && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2331                                                    e->Iex.Binop.arg2)
2332       && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2333      vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2334      ppIRExpr(e);
2335      vex_printf("\n");
2336   }
2337
2338   /* Show the overall results of folding. */
2339   if (DEBUG_IROPT && e2 != e) {
2340      vex_printf("FOLD: ");
2341      ppIRExpr(e); vex_printf("  ->  ");
2342      ppIRExpr(e2); vex_printf("\n");
2343   }
2344
2345   return e2;
2346
2347 unhandled:
2348#  if 0
2349   vex_printf("\n\n");
2350   ppIRExpr(e);
2351   vpanic("fold_Expr: no rule for the above");
2352#  else
2353   if (vex_control.iropt_verbosity > 0) {
2354      vex_printf("vex iropt: fold_Expr: no const rule for: ");
2355      ppIRExpr(e);
2356      vex_printf("\n");
2357   }
2358   return e2;
2359#  endif
2360}
2361
2362
2363/* Apply the subst to a simple 1-level expression -- guaranteed to be
2364   1-level due to previous flattening pass. */
2365
2366static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2367{
2368   switch (ex->tag) {
2369      case Iex_RdTmp:
2370         if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2371            IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2372            if (rhs->tag == Iex_RdTmp)
2373               return rhs;
2374            if (rhs->tag == Iex_Const
2375                && rhs->Iex.Const.con->tag != Ico_F64i)
2376               return rhs;
2377         }
2378         /* not bound in env */
2379         return ex;
2380
2381      case Iex_Const:
2382      case Iex_Get:
2383         return ex;
2384
2385      case Iex_GetI:
2386         vassert(isIRAtom(ex->Iex.GetI.ix));
2387         return IRExpr_GetI(
2388            ex->Iex.GetI.descr,
2389            subst_Expr(env, ex->Iex.GetI.ix),
2390            ex->Iex.GetI.bias
2391         );
2392
2393      case Iex_Qop: {
2394         IRQop* qop = ex->Iex.Qop.details;
2395         vassert(isIRAtom(qop->arg1));
2396         vassert(isIRAtom(qop->arg2));
2397         vassert(isIRAtom(qop->arg3));
2398         vassert(isIRAtom(qop->arg4));
2399         return IRExpr_Qop(
2400                   qop->op,
2401                   subst_Expr(env, qop->arg1),
2402                   subst_Expr(env, qop->arg2),
2403                   subst_Expr(env, qop->arg3),
2404                   subst_Expr(env, qop->arg4)
2405                );
2406      }
2407
2408      case Iex_Triop: {
2409         IRTriop* triop = ex->Iex.Triop.details;
2410         vassert(isIRAtom(triop->arg1));
2411         vassert(isIRAtom(triop->arg2));
2412         vassert(isIRAtom(triop->arg3));
2413         return IRExpr_Triop(
2414                   triop->op,
2415                   subst_Expr(env, triop->arg1),
2416                   subst_Expr(env, triop->arg2),
2417                   subst_Expr(env, triop->arg3)
2418                );
2419      }
2420
2421      case Iex_Binop:
2422         vassert(isIRAtom(ex->Iex.Binop.arg1));
2423         vassert(isIRAtom(ex->Iex.Binop.arg2));
2424         return IRExpr_Binop(
2425                   ex->Iex.Binop.op,
2426                   subst_Expr(env, ex->Iex.Binop.arg1),
2427                   subst_Expr(env, ex->Iex.Binop.arg2)
2428                );
2429
2430      case Iex_Unop:
2431         vassert(isIRAtom(ex->Iex.Unop.arg));
2432         return IRExpr_Unop(
2433                   ex->Iex.Unop.op,
2434                   subst_Expr(env, ex->Iex.Unop.arg)
2435                );
2436
2437      case Iex_Load:
2438         vassert(isIRAtom(ex->Iex.Load.addr));
2439         return IRExpr_Load(
2440                   ex->Iex.Load.end,
2441                   ex->Iex.Load.ty,
2442                   subst_Expr(env, ex->Iex.Load.addr)
2443                );
2444
2445      case Iex_CCall: {
2446         Int      i;
2447         IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2448         for (i = 0; args2[i]; i++) {
2449            vassert(isIRAtom(args2[i]));
2450            args2[i] = subst_Expr(env, args2[i]);
2451         }
2452         return IRExpr_CCall(
2453                   ex->Iex.CCall.cee,
2454                   ex->Iex.CCall.retty,
2455                   args2
2456                );
2457      }
2458
2459      case Iex_ITE:
2460         vassert(isIRAtom(ex->Iex.ITE.cond));
2461         vassert(isIRAtom(ex->Iex.ITE.iftrue));
2462         vassert(isIRAtom(ex->Iex.ITE.iffalse));
2463         return IRExpr_ITE(
2464                   subst_Expr(env, ex->Iex.ITE.cond),
2465                   subst_Expr(env, ex->Iex.ITE.iftrue),
2466                   subst_Expr(env, ex->Iex.ITE.iffalse)
2467                );
2468
2469      default:
2470         vex_printf("\n\n"); ppIRExpr(ex);
2471         vpanic("subst_Expr");
2472
2473   }
2474}
2475
2476
2477/* Apply the subst to stmt, then fold the result as much as possible.
2478   Much simplified due to stmt being previously flattened.  As a
2479   result of this, the stmt may wind up being turned into a no-op.
2480*/
2481static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2482{
2483#  if 0
2484   vex_printf("\nsubst and fold stmt\n");
2485   ppIRStmt(st);
2486   vex_printf("\n");
2487#  endif
2488
2489   switch (st->tag) {
2490      case Ist_AbiHint:
2491         vassert(isIRAtom(st->Ist.AbiHint.base));
2492         vassert(isIRAtom(st->Ist.AbiHint.nia));
2493         return IRStmt_AbiHint(
2494                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2495                   st->Ist.AbiHint.len,
2496                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2497                );
2498      case Ist_Put:
2499         vassert(isIRAtom(st->Ist.Put.data));
2500         return IRStmt_Put(
2501                   st->Ist.Put.offset,
2502                   fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2503                );
2504
2505      case Ist_PutI: {
2506         IRPutI *puti, *puti2;
2507         puti = st->Ist.PutI.details;
2508         vassert(isIRAtom(puti->ix));
2509         vassert(isIRAtom(puti->data));
2510         puti2 = mkIRPutI(puti->descr,
2511                          fold_Expr(env, subst_Expr(env, puti->ix)),
2512                          puti->bias,
2513                          fold_Expr(env, subst_Expr(env, puti->data)));
2514         return IRStmt_PutI(puti2);
2515      }
2516
2517      case Ist_WrTmp:
2518         /* This is the one place where an expr (st->Ist.WrTmp.data) is
2519            allowed to be more than just a constant or a tmp. */
2520         return IRStmt_WrTmp(
2521                   st->Ist.WrTmp.tmp,
2522                   fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2523                );
2524
2525      case Ist_Store:
2526         vassert(isIRAtom(st->Ist.Store.addr));
2527         vassert(isIRAtom(st->Ist.Store.data));
2528         return IRStmt_Store(
2529                   st->Ist.Store.end,
2530                   fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2531                   fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2532                );
2533
2534      case Ist_StoreG: {
2535         IRStoreG* sg = st->Ist.StoreG.details;
2536         vassert(isIRAtom(sg->addr));
2537         vassert(isIRAtom(sg->data));
2538         vassert(isIRAtom(sg->guard));
2539         IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
2540         IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
2541         IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2542         if (fguard->tag == Iex_Const) {
2543            /* The condition on this store has folded down to a constant. */
2544            vassert(fguard->Iex.Const.con->tag == Ico_U1);
2545            if (fguard->Iex.Const.con->Ico.U1 == False) {
2546               return IRStmt_NoOp();
2547            } else {
2548               vassert(fguard->Iex.Const.con->Ico.U1 == True);
2549               return IRStmt_Store(sg->end, faddr, fdata);
2550            }
2551         }
2552         return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2553      }
2554
2555      case Ist_LoadG: {
2556         /* This is complicated.  If the guard folds down to 'false',
2557            we can replace it with an assignment 'dst := alt', but if
2558            the guard folds down to 'true', we can't conveniently
2559            replace it with an unconditional load, because doing so
2560            requires generating a new temporary, and that is not easy
2561            to do at this point. */
2562         IRLoadG* lg = st->Ist.LoadG.details;
2563         vassert(isIRAtom(lg->addr));
2564         vassert(isIRAtom(lg->alt));
2565         vassert(isIRAtom(lg->guard));
2566         IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
2567         IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
2568         IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2569         if (fguard->tag == Iex_Const) {
2570            /* The condition on this load has folded down to a constant. */
2571            vassert(fguard->Iex.Const.con->tag == Ico_U1);
2572            if (fguard->Iex.Const.con->Ico.U1 == False) {
2573               /* The load is not going to happen -- instead 'alt' is
2574                  assigned to 'dst'.  */
2575               return IRStmt_WrTmp(lg->dst, falt);
2576            } else {
2577               vassert(fguard->Iex.Const.con->Ico.U1 == True);
2578               /* The load is always going to happen.  We want to
2579                  convert to an unconditional load and assign to 'dst'
2580                  (IRStmt_WrTmp).  Problem is we need an extra temp to
2581                  hold the loaded value, but none is available.
2582                  Instead, reconstitute the conditional load (with
2583                  folded args, of course) and let the caller of this
2584                  routine deal with the problem. */
2585            }
2586         }
2587         return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2588      }
2589
2590      case Ist_CAS: {
2591         IRCAS *cas, *cas2;
2592         cas = st->Ist.CAS.details;
2593         vassert(isIRAtom(cas->addr));
2594         vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2595         vassert(isIRAtom(cas->expdLo));
2596         vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2597         vassert(isIRAtom(cas->dataLo));
2598         cas2 = mkIRCAS(
2599                   cas->oldHi, cas->oldLo, cas->end,
2600                   fold_Expr(env, subst_Expr(env, cas->addr)),
2601                   cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2602                               : NULL,
2603                   fold_Expr(env, subst_Expr(env, cas->expdLo)),
2604                   cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2605                               : NULL,
2606                   fold_Expr(env, subst_Expr(env, cas->dataLo))
2607                );
2608         return IRStmt_CAS(cas2);
2609      }
2610
2611      case Ist_LLSC:
2612         vassert(isIRAtom(st->Ist.LLSC.addr));
2613         if (st->Ist.LLSC.storedata)
2614            vassert(isIRAtom(st->Ist.LLSC.storedata));
2615         return IRStmt_LLSC(
2616                   st->Ist.LLSC.end,
2617                   st->Ist.LLSC.result,
2618                   fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2619                   st->Ist.LLSC.storedata
2620                      ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2621                      : NULL
2622                );
2623
2624      case Ist_Dirty: {
2625         Int     i;
2626         IRDirty *d, *d2;
2627         d = st->Ist.Dirty.details;
2628         d2 = emptyIRDirty();
2629         *d2 = *d;
2630         d2->args = shallowCopyIRExprVec(d2->args);
2631         if (d2->mFx != Ifx_None) {
2632            vassert(isIRAtom(d2->mAddr));
2633            d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2634         }
2635         vassert(isIRAtom(d2->guard));
2636         d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2637         for (i = 0; d2->args[i]; i++) {
2638            IRExpr* arg = d2->args[i];
2639            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
2640               vassert(isIRAtom(arg));
2641               d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2642            }
2643         }
2644         return IRStmt_Dirty(d2);
2645      }
2646
2647      case Ist_IMark:
2648         return IRStmt_IMark(st->Ist.IMark.addr,
2649                             st->Ist.IMark.len,
2650                             st->Ist.IMark.delta);
2651
2652      case Ist_NoOp:
2653         return IRStmt_NoOp();
2654
2655      case Ist_MBE:
2656         return IRStmt_MBE(st->Ist.MBE.event);
2657
2658      case Ist_Exit: {
2659         IRExpr* fcond;
2660         vassert(isIRAtom(st->Ist.Exit.guard));
2661         fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2662         if (fcond->tag == Iex_Const) {
2663            /* Interesting.  The condition on this exit has folded down to
2664               a constant. */
2665            vassert(fcond->Iex.Const.con->tag == Ico_U1);
2666            if (fcond->Iex.Const.con->Ico.U1 == False) {
2667               /* exit is never going to happen, so dump the statement. */
2668               return IRStmt_NoOp();
2669            } else {
2670               vassert(fcond->Iex.Const.con->Ico.U1 == True);
2671               /* Hmmm.  The exit has become unconditional.  Leave it
2672                  as it is for now, since we'd have to truncate the BB
2673                  at this point, which is tricky.  Such truncation is
2674                  done later by the dead-code elimination pass. */
2675               /* fall out into the reconstruct-the-exit code. */
2676               if (vex_control.iropt_verbosity > 0)
2677                  /* really a misuse of vex_control.iropt_verbosity */
2678                  vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2679            }
2680         }
2681         return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2682                                   st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2683      }
2684
2685   default:
2686      vex_printf("\n"); ppIRStmt(st);
2687      vpanic("subst_and_fold_Stmt");
2688   }
2689}
2690
2691
2692IRSB* cprop_BB ( IRSB* in )
2693{
2694   Int      i;
2695   IRSB*    out;
2696   IRStmt*  st2;
2697   Int      n_tmps = in->tyenv->types_used;
2698   IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
2699   /* Keep track of IRStmt_LoadGs that we need to revisit after
2700      processing all the other statements. */
2701   const Int N_FIXUPS = 16;
2702   Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2703   Int n_fixups = 0;
2704
2705   out = emptyIRSB();
2706   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2707
2708   /* Set up the env with which travels forward.  This holds a
2709      substitution, mapping IRTemps to IRExprs. The environment
2710      is to be applied as we move along.  Keys are IRTemps.
2711      Values are IRExpr*s.
2712   */
2713   for (i = 0; i < n_tmps; i++)
2714      env[i] = NULL;
2715
2716   /* For each original SSA-form stmt ... */
2717   for (i = 0; i < in->stmts_used; i++) {
2718
2719      /* First apply the substitution to the current stmt.  This
2720         propagates in any constants and tmp-tmp assignments
2721         accumulated prior to this point.  As part of the subst_Stmt
2722         call, also then fold any constant expressions resulting. */
2723
2724      st2 = in->stmts[i];
2725
2726      /* perhaps st2 is already a no-op? */
2727      if (st2->tag == Ist_NoOp) continue;
2728
2729      st2 = subst_and_fold_Stmt( env, st2 );
2730
2731      /* Deal with some post-folding special cases. */
2732      switch (st2->tag) {
2733
2734         /* If the statement has been folded into a no-op, forget
2735            it. */
2736         case Ist_NoOp:
2737            continue;
2738
2739         /* If the statement assigns to an IRTemp add it to the
2740            running environment. This is for the benefit of copy
2741            propagation and to allow sameIRExpr look through
2742            IRTemps. */
2743         case Ist_WrTmp: {
2744            vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2745            env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2746
2747            /* 't1 = t2' -- don't add to BB; will be optimized out */
2748            if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2749               continue;
2750
2751            /* 't = const' && 'const != F64i' -- don't add to BB
2752               Note, we choose not to propagate const when const is an
2753               F64i, so that F64i literals can be CSE'd later.  This
2754               helps x86 floating point code generation. */
2755            if (st2->Ist.WrTmp.data->tag == Iex_Const
2756                && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2757               continue;
2758            }
2759            /* else add it to the output, as normal */
2760            break;
2761         }
2762
2763         case Ist_LoadG: {
2764            IRLoadG* lg    = st2->Ist.LoadG.details;
2765            IRExpr*  guard = lg->guard;
2766            if (guard->tag == Iex_Const) {
2767               /* The guard has folded to a constant, and that
2768                  constant must be 1:I1, since subst_and_fold_Stmt
2769                  folds out the case 0:I1 by itself. */
2770               vassert(guard->Iex.Const.con->tag == Ico_U1);
2771               vassert(guard->Iex.Const.con->Ico.U1 == True);
2772               /* Add a NoOp here as a placeholder, and make a note of
2773                  where it is in the output block.  Afterwards we'll
2774                  come back here and transform the NoOp and the LoadG
2775                  into a load-convert pair.  The fixups[] entry
2776                  refers to the inserted NoOp, and we expect to find
2777                  the relevant LoadG immediately after it. */
2778               vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2779               if (n_fixups < N_FIXUPS) {
2780                  fixups[n_fixups++] = out->stmts_used;
2781                  addStmtToIRSB( out, IRStmt_NoOp() );
2782               }
2783            }
2784            /* And always add the LoadG to the output, regardless. */
2785            break;
2786         }
2787
2788      default:
2789         break;
2790      }
2791
2792      /* Not interesting, copy st2 into the output block. */
2793      addStmtToIRSB( out, st2 );
2794   }
2795
2796#  if STATS_IROPT
2797   vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
2798              invocation_count, recursion_count, success_count,
2799              recursion_success_count, max_nodes_visited);
2800#  endif
2801
2802   out->next     = subst_Expr( env, in->next );
2803   out->jumpkind = in->jumpkind;
2804   out->offsIP   = in->offsIP;
2805
2806   /* Process any leftover unconditional LoadGs that we noticed
2807      in the main pass. */
2808   vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2809   for (i = 0; i < n_fixups; i++) {
2810      Int ix = fixups[i];
2811      /* Carefully verify that the LoadG has the expected form. */
2812      vassert(ix >= 0 && ix+1 < out->stmts_used);
2813      IRStmt* nop = out->stmts[ix];
2814      IRStmt* lgu = out->stmts[ix+1];
2815      vassert(nop->tag == Ist_NoOp);
2816      vassert(lgu->tag == Ist_LoadG);
2817      IRLoadG* lg    = lgu->Ist.LoadG.details;
2818      IRExpr*  guard = lg->guard;
2819      vassert(guard->Iex.Const.con->tag == Ico_U1);
2820      vassert(guard->Iex.Const.con->Ico.U1 == True);
2821      /* Figure out the load and result types, and the implied
2822         conversion operation. */
2823      IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2824      typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2825      IROp cvtOp = Iop_INVALID;
2826      switch (lg->cvt) {
2827         case ILGop_Ident32: break;
2828         case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
2829         case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
2830         case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2831         case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2832         default: vpanic("cprop_BB: unhandled ILGOp");
2833      }
2834      /* Replace the placeholder NoOp by the required unconditional
2835         load. */
2836      IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2837      out->stmts[ix]
2838         = IRStmt_WrTmp(tLoaded,
2839                        IRExpr_Load(lg->end, cvtArg, lg->addr));
2840      /* Replace the LoadG by a conversion from the loaded value's
2841         type to the required result type. */
2842      out->stmts[ix+1]
2843         = IRStmt_WrTmp(
2844              lg->dst, cvtOp == Iop_INVALID
2845                          ? IRExpr_RdTmp(tLoaded)
2846                          : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2847   }
2848
2849   return out;
2850}
2851
2852
2853/*---------------------------------------------------------------*/
2854/*--- Dead code (t = E) removal                               ---*/
2855/*---------------------------------------------------------------*/
2856
2857/* As a side effect, also removes all code following an unconditional
2858   side exit. */
2859
2860/* The type of the HashHW map is: a map from IRTemp to nothing
2861   -- really just operating a set or IRTemps.
2862*/
2863
2864inline
2865static void addUses_Temp ( Bool* set, IRTemp tmp )
2866{
2867   set[(Int)tmp] = True;
2868}
2869
2870static void addUses_Expr ( Bool* set, IRExpr* e )
2871{
2872   Int i;
2873   switch (e->tag) {
2874      case Iex_GetI:
2875         addUses_Expr(set, e->Iex.GetI.ix);
2876         return;
2877      case Iex_ITE:
2878         addUses_Expr(set, e->Iex.ITE.cond);
2879         addUses_Expr(set, e->Iex.ITE.iftrue);
2880         addUses_Expr(set, e->Iex.ITE.iffalse);
2881         return;
2882      case Iex_CCall:
2883         for (i = 0; e->Iex.CCall.args[i]; i++)
2884            addUses_Expr(set, e->Iex.CCall.args[i]);
2885         return;
2886      case Iex_Load:
2887         addUses_Expr(set, e->Iex.Load.addr);
2888         return;
2889      case Iex_Qop:
2890         addUses_Expr(set, e->Iex.Qop.details->arg1);
2891         addUses_Expr(set, e->Iex.Qop.details->arg2);
2892         addUses_Expr(set, e->Iex.Qop.details->arg3);
2893         addUses_Expr(set, e->Iex.Qop.details->arg4);
2894         return;
2895      case Iex_Triop:
2896         addUses_Expr(set, e->Iex.Triop.details->arg1);
2897         addUses_Expr(set, e->Iex.Triop.details->arg2);
2898         addUses_Expr(set, e->Iex.Triop.details->arg3);
2899         return;
2900      case Iex_Binop:
2901         addUses_Expr(set, e->Iex.Binop.arg1);
2902         addUses_Expr(set, e->Iex.Binop.arg2);
2903         return;
2904      case Iex_Unop:
2905         addUses_Expr(set, e->Iex.Unop.arg);
2906         return;
2907      case Iex_RdTmp:
2908         addUses_Temp(set, e->Iex.RdTmp.tmp);
2909         return;
2910      case Iex_Const:
2911      case Iex_Get:
2912         return;
2913      default:
2914         vex_printf("\n");
2915         ppIRExpr(e);
2916         vpanic("addUses_Expr");
2917   }
2918}
2919
2920static void addUses_Stmt ( Bool* set, IRStmt* st )
2921{
2922   Int      i;
2923   IRDirty* d;
2924   IRCAS*   cas;
2925   switch (st->tag) {
2926      case Ist_AbiHint:
2927         addUses_Expr(set, st->Ist.AbiHint.base);
2928         addUses_Expr(set, st->Ist.AbiHint.nia);
2929         return;
2930      case Ist_PutI:
2931         addUses_Expr(set, st->Ist.PutI.details->ix);
2932         addUses_Expr(set, st->Ist.PutI.details->data);
2933         return;
2934      case Ist_WrTmp:
2935         addUses_Expr(set, st->Ist.WrTmp.data);
2936         return;
2937      case Ist_Put:
2938         addUses_Expr(set, st->Ist.Put.data);
2939         return;
2940      case Ist_Store:
2941         addUses_Expr(set, st->Ist.Store.addr);
2942         addUses_Expr(set, st->Ist.Store.data);
2943         return;
2944      case Ist_StoreG: {
2945         IRStoreG* sg = st->Ist.StoreG.details;
2946         addUses_Expr(set, sg->addr);
2947         addUses_Expr(set, sg->data);
2948         addUses_Expr(set, sg->guard);
2949         return;
2950      }
2951      case Ist_LoadG: {
2952         IRLoadG* lg = st->Ist.LoadG.details;
2953         addUses_Expr(set, lg->addr);
2954         addUses_Expr(set, lg->alt);
2955         addUses_Expr(set, lg->guard);
2956         return;
2957      }
2958      case Ist_CAS:
2959         cas = st->Ist.CAS.details;
2960         addUses_Expr(set, cas->addr);
2961         if (cas->expdHi)
2962            addUses_Expr(set, cas->expdHi);
2963         addUses_Expr(set, cas->expdLo);
2964         if (cas->dataHi)
2965            addUses_Expr(set, cas->dataHi);
2966         addUses_Expr(set, cas->dataLo);
2967         return;
2968      case Ist_LLSC:
2969         addUses_Expr(set, st->Ist.LLSC.addr);
2970         if (st->Ist.LLSC.storedata)
2971            addUses_Expr(set, st->Ist.LLSC.storedata);
2972         return;
2973      case Ist_Dirty:
2974         d = st->Ist.Dirty.details;
2975         if (d->mFx != Ifx_None)
2976            addUses_Expr(set, d->mAddr);
2977         addUses_Expr(set, d->guard);
2978         for (i = 0; d->args[i] != NULL; i++) {
2979            IRExpr* arg = d->args[i];
2980            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
2981               addUses_Expr(set, arg);
2982         }
2983         return;
2984      case Ist_NoOp:
2985      case Ist_IMark:
2986      case Ist_MBE:
2987         return;
2988      case Ist_Exit:
2989         addUses_Expr(set, st->Ist.Exit.guard);
2990         return;
2991      default:
2992         vex_printf("\n");
2993         ppIRStmt(st);
2994         vpanic("addUses_Stmt");
2995   }
2996}
2997
2998
2999/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
3000static Bool isZeroU1 ( IRExpr* e )
3001{
3002   return toBool( e->tag == Iex_Const
3003                  && e->Iex.Const.con->tag == Ico_U1
3004                  && e->Iex.Const.con->Ico.U1 == False );
3005}
3006
3007/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
3008static Bool isOneU1 ( IRExpr* e )
3009{
3010   return toBool( e->tag == Iex_Const
3011                  && e->Iex.Const.con->tag == Ico_U1
3012                  && e->Iex.Const.con->Ico.U1 == True );
3013}
3014
3015
3016/* Note, this destructively modifies the given IRSB. */
3017
3018/* Scan backwards through statements, carrying a set of IRTemps which
3019   are known to be used after the current point.  On encountering 't =
3020   E', delete the binding if it is not used.  Otherwise, add any temp
3021   uses to the set and keep on moving backwards.
3022
3023   As an enhancement, the first (backwards) pass searches for IR exits
3024   with always-taken conditions and notes the location of the earliest
3025   one in the block.  If any such are found, a second pass copies the
3026   exit destination and jump kind to the bb-end.  Then, the exit and
3027   all statements following it are turned into no-ops.
3028*/
3029
3030/* notstatic */ void do_deadcode_BB ( IRSB* bb )
3031{
3032   Int     i, i_unconditional_exit;
3033   Int     n_tmps = bb->tyenv->types_used;
3034   Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
3035   IRStmt* st;
3036
3037   for (i = 0; i < n_tmps; i++)
3038      set[i] = False;
3039
3040   /* start off by recording IRTemp uses in the next field. */
3041   addUses_Expr(set, bb->next);
3042
3043   /* First pass */
3044
3045   /* Work backwards through the stmts */
3046   i_unconditional_exit = -1;
3047   for (i = bb->stmts_used-1; i >= 0; i--) {
3048      st = bb->stmts[i];
3049      if (st->tag == Ist_NoOp)
3050         continue;
3051      /* take note of any unconditional exits */
3052      if (st->tag == Ist_Exit
3053          && isOneU1(st->Ist.Exit.guard))
3054         i_unconditional_exit = i;
3055      if (st->tag == Ist_WrTmp
3056          && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3057          /* it's an IRTemp which never got used.  Delete it. */
3058         if (DEBUG_IROPT) {
3059            vex_printf("DEAD: ");
3060            ppIRStmt(st);
3061            vex_printf("\n");
3062         }
3063         bb->stmts[i] = IRStmt_NoOp();
3064      }
3065      else
3066      if (st->tag == Ist_Dirty
3067          && st->Ist.Dirty.details->guard
3068          && isZeroU1(st->Ist.Dirty.details->guard)) {
3069         /* This is a dirty helper which will never get called.
3070            Delete it. */
3071         bb->stmts[i] = IRStmt_NoOp();
3072       }
3073       else {
3074         /* Note any IRTemp uses made by the current statement. */
3075         addUses_Stmt(set, st);
3076      }
3077   }
3078
3079   /* Optional second pass: if any unconditional exits were found,
3080      delete them and all following statements. */
3081
3082   if (i_unconditional_exit != -1) {
3083      if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3084                        i_unconditional_exit);
3085      vassert(i_unconditional_exit >= 0
3086              && i_unconditional_exit < bb->stmts_used);
3087      bb->next
3088         = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3089      bb->jumpkind
3090         = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3091      bb->offsIP
3092         = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3093      for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3094         bb->stmts[i] = IRStmt_NoOp();
3095   }
3096}
3097
3098
3099/*---------------------------------------------------------------*/
3100/*--- Specialisation of helper function calls, in             ---*/
3101/*--- collaboration with the front end                        ---*/
3102/*---------------------------------------------------------------*/
3103
3104static
3105IRSB* spec_helpers_BB(
3106         IRSB* bb,
3107         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3108      )
3109{
3110   Int     i;
3111   IRStmt* st;
3112   IRExpr* ex;
3113   Bool    any = False;
3114
3115   for (i = bb->stmts_used-1; i >= 0; i--) {
3116      st = bb->stmts[i];
3117
3118      if (st->tag != Ist_WrTmp
3119          || st->Ist.WrTmp.data->tag != Iex_CCall)
3120         continue;
3121
3122      ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3123                          st->Ist.WrTmp.data->Iex.CCall.args,
3124                          &bb->stmts[0], i );
3125      if (!ex)
3126        /* the front end can't think of a suitable replacement */
3127        continue;
3128
3129      /* We got something better.  Install it in the bb. */
3130      any = True;
3131      bb->stmts[i]
3132         = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3133
3134      if (0) {
3135         vex_printf("SPEC: ");
3136         ppIRExpr(st->Ist.WrTmp.data);
3137         vex_printf("  -->  ");
3138         ppIRExpr(ex);
3139         vex_printf("\n");
3140      }
3141   }
3142
3143   if (any)
3144      bb = flatten_BB(bb);
3145   return bb;
3146}
3147
3148
3149/*---------------------------------------------------------------*/
3150/*--- Determination of guest state aliasing relationships     ---*/
3151/*---------------------------------------------------------------*/
3152
3153/* These are helper functions for CSE and GetI/PutI transformations.
3154
3155   Determine, to the extent possible, the relationship between two
3156   guest state accesses.  The possible outcomes are:
3157
3158   * Exact alias.  These two accesses denote precisely the same
3159     piece of the guest state.
3160
3161   * Definitely no alias.  These two accesses are guaranteed not to
3162     overlap any part of the guest state.
3163
3164   * Unknown -- if neither of the above can be established.
3165
3166   If in doubt, return Unknown.  */
3167
3168typedef
3169   enum { ExactAlias, NoAlias, UnknownAlias }
3170   GSAliasing;
3171
3172
3173/* Produces the alias relation between an indexed guest
3174   state access and a non-indexed access. */
3175
3176static
3177GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3178                                    Int offset2, IRType ty2 )
3179{
3180   UInt minoff1, maxoff1, minoff2, maxoff2;
3181
3182   getArrayBounds( descr1, &minoff1, &maxoff1 );
3183   minoff2 = offset2;
3184   maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3185
3186   if (maxoff1 < minoff2 || maxoff2 < minoff1)
3187      return NoAlias;
3188
3189   /* Could probably do better here if required.  For the moment
3190      however just claim not to know anything more. */
3191   return UnknownAlias;
3192}
3193
3194
3195/* Produces the alias relation between two indexed guest state
3196   accesses. */
3197
3198static
3199GSAliasing getAliasingRelation_II (
3200              IRRegArray* descr1, IRExpr* ix1, Int bias1,
3201              IRRegArray* descr2, IRExpr* ix2, Int bias2
3202           )
3203{
3204   UInt minoff1, maxoff1, minoff2, maxoff2;
3205   Int  iters;
3206
3207   /* First try hard to show they don't alias. */
3208   getArrayBounds( descr1, &minoff1, &maxoff1 );
3209   getArrayBounds( descr2, &minoff2, &maxoff2 );
3210   if (maxoff1 < minoff2 || maxoff2 < minoff1)
3211      return NoAlias;
3212
3213   /* So the two arrays at least partially overlap.  To get any
3214      further we'll have to be sure that the descriptors are
3215      identical. */
3216   if (!eqIRRegArray(descr1, descr2))
3217      return UnknownAlias;
3218
3219   /* The descriptors are identical.  Now the only difference can be
3220      in the index expressions.  If they cannot be shown to be
3221      identical, we have to say we don't know what the aliasing
3222      relation will be.  Now, since the IR is flattened, the index
3223      expressions should be atoms -- either consts or tmps.  So that
3224      makes the comparison simple. */
3225   vassert(isIRAtom(ix1));
3226   vassert(isIRAtom(ix2));
3227   if (!eqIRAtom(ix1,ix2))
3228      return UnknownAlias;
3229
3230   /* Ok, the index expressions are identical.  So now the only way
3231      they can be different is in the bias.  Normalise this
3232      paranoidly, to reliably establish equality/non-equality. */
3233
3234   /* So now we know that the GetI and PutI index the same array
3235      with the same base.  Are the offsets the same, modulo the
3236      array size?  Do this paranoidly. */
3237   vassert(descr1->nElems == descr2->nElems);
3238   vassert(descr1->elemTy == descr2->elemTy);
3239   vassert(descr1->base   == descr2->base);
3240   iters = 0;
3241   while (bias1 < 0 || bias2 < 0) {
3242      bias1 += descr1->nElems;
3243      bias2 += descr1->nElems;
3244      iters++;
3245      if (iters > 10)
3246         vpanic("getAliasingRelation: iters");
3247   }
3248   vassert(bias1 >= 0 && bias2 >= 0);
3249   bias1 %= descr1->nElems;
3250   bias2 %= descr1->nElems;
3251   vassert(bias1 >= 0 && bias1 < descr1->nElems);
3252   vassert(bias2 >= 0 && bias2 < descr1->nElems);
3253
3254   /* Finally, biasP and biasG are normalised into the range
3255      0 .. descrP/G->nElems - 1.  And so we can establish
3256      equality/non-equality. */
3257
3258   return bias1==bias2 ? ExactAlias : NoAlias;
3259}
3260
3261
3262/*---------------------------------------------------------------*/
3263/*--- Common Subexpression Elimination                        ---*/
3264/*---------------------------------------------------------------*/
3265
3266/* Expensive in time and space. */
3267
3268/* Uses two environments:
3269   a IRTemp -> IRTemp mapping
3270   a mapping from AvailExpr* to IRTemp
3271*/
3272
3273typedef
3274   struct {
3275      enum { TCc, TCt } tag;
3276      union { IRTemp tmp; IRConst* con; } u;
3277   }
3278   TmpOrConst;
3279
3280static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3281{
3282   if (tc1->tag != tc2->tag)
3283      return False;
3284   switch (tc1->tag) {
3285      case TCc:
3286         return eqIRConst(tc1->u.con, tc2->u.con);
3287      case TCt:
3288         return tc1->u.tmp == tc2->u.tmp;
3289      default:
3290         vpanic("eqTmpOrConst");
3291   }
3292}
3293
3294static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3295{
3296   Bool eq = cee1->addr == cee2->addr;
3297   if (eq) {
3298      vassert(cee1->regparms == cee2->regparms);
3299      vassert(cee1->mcx_mask == cee2->mcx_mask);
3300      /* Names should be the same too, but we don't bother to
3301         check. */
3302   }
3303   return eq;
3304}
3305
3306/* Convert a NULL terminated IRExpr* vector to an array of
3307   TmpOrConsts, and a length. */
3308static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3309                                       /*OUT*/Int* nOuts,
3310                                       IRExpr** ins )
3311{
3312   Int i, n;
3313   /* We have to make two passes, one to count, one to copy. */
3314   for (n = 0; ins[n]; n++)
3315      ;
3316   *outs  = LibVEX_Alloc(n * sizeof(TmpOrConst));
3317   *nOuts = n;
3318   /* and now copy .. */
3319   for (i = 0; i < n; i++) {
3320      IRExpr*       arg = ins[i];
3321      TmpOrConst* dst = &(*outs)[i];
3322      if (arg->tag == Iex_RdTmp) {
3323         dst->tag   = TCt;
3324         dst->u.tmp = arg->Iex.RdTmp.tmp;
3325      }
3326      else if (arg->tag == Iex_Const) {
3327         dst->tag   = TCc;
3328         dst->u.con = arg->Iex.Const.con;
3329      }
3330      else {
3331         /* Failure of this is serious; it means that the presented arg
3332            isn't an IR atom, as it should be. */
3333         vpanic("irExprVec_to_TmpOrConsts");
3334      }
3335   }
3336}
3337
3338typedef
3339   struct {
3340      enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3341             CCall
3342      } tag;
3343      union {
3344         /* unop(tmp) */
3345         struct {
3346            IROp   op;
3347            IRTemp arg;
3348         } Ut;
3349         /* binop(tmp,tmp) */
3350         struct {
3351            IROp   op;
3352            IRTemp arg1;
3353            IRTemp arg2;
3354         } Btt;
3355         /* binop(tmp,const) */
3356         struct {
3357            IROp    op;
3358            IRTemp  arg1;
3359            IRConst con2;
3360         } Btc;
3361         /* binop(const,tmp) */
3362         struct {
3363            IROp    op;
3364            IRConst con1;
3365            IRTemp  arg2;
3366         } Bct;
3367         /* F64i-style const */
3368         struct {
3369            ULong f64i;
3370         } Cf64i;
3371         /* ITE(tmp,tmp,tmp) */
3372         struct {
3373            IRTemp co;
3374            IRTemp e1;
3375            IRTemp e0;
3376         } Ittt;
3377         /* ITE(tmp,tmp,const) */
3378         struct {
3379            IRTemp  co;
3380            IRTemp  e1;
3381            IRConst con0;
3382         } Ittc;
3383         /* ITE(tmp,const,tmp) */
3384         struct {
3385            IRTemp  co;
3386            IRConst con1;
3387            IRTemp  e0;
3388         } Itct;
3389         /* ITE(tmp,const,const) */
3390         struct {
3391            IRTemp  co;
3392            IRConst con1;
3393            IRConst con0;
3394         } Itcc;
3395         /* GetI(descr,tmp,bias)*/
3396         struct {
3397            IRRegArray* descr;
3398            IRTemp      ix;
3399            Int         bias;
3400         } GetIt;
3401         /* Clean helper call */
3402         struct {
3403            IRCallee*   cee;
3404            TmpOrConst* args;
3405            Int         nArgs;
3406            IRType      retty;
3407         } CCall;
3408      } u;
3409   }
3410   AvailExpr;
3411
3412static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3413{
3414   if (LIKELY(a1->tag != a2->tag))
3415      return False;
3416   switch (a1->tag) {
3417      case Ut:
3418         return toBool(
3419                a1->u.Ut.op == a2->u.Ut.op
3420                && a1->u.Ut.arg == a2->u.Ut.arg);
3421      case Btt:
3422         return toBool(
3423                a1->u.Btt.op == a2->u.Btt.op
3424                && a1->u.Btt.arg1 == a2->u.Btt.arg1
3425                && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3426      case Btc:
3427         return toBool(
3428                a1->u.Btc.op == a2->u.Btc.op
3429                && a1->u.Btc.arg1 == a2->u.Btc.arg1
3430                && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3431      case Bct:
3432         return toBool(
3433                a1->u.Bct.op == a2->u.Bct.op
3434                && a1->u.Bct.arg2 == a2->u.Bct.arg2
3435                && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3436      case Cf64i:
3437         return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3438      case Ittt:
3439         return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3440                       && a1->u.Ittt.e1 == a2->u.Ittt.e1
3441                       && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3442      case Ittc:
3443         return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3444                       && a1->u.Ittc.e1 == a2->u.Ittc.e1
3445                       && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3446      case Itct:
3447         return toBool(a1->u.Itct.co == a2->u.Itct.co
3448                       && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3449                       && a1->u.Itct.e0 == a2->u.Itct.e0);
3450      case Itcc:
3451         return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3452                       && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3453                       && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3454      case GetIt:
3455         return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3456                       && a1->u.GetIt.ix == a2->u.GetIt.ix
3457                       && a1->u.GetIt.bias == a2->u.GetIt.bias);
3458      case CCall: {
3459         Int  i, n;
3460         Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3461                   && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3462         if (eq) {
3463            n = a1->u.CCall.nArgs;
3464            for (i = 0; i < n; i++) {
3465               if (!eqTmpOrConst( &a1->u.CCall.args[i],
3466                                  &a2->u.CCall.args[i] )) {
3467                  eq = False;
3468                  break;
3469               }
3470            }
3471         }
3472         if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3473         return eq;
3474      }
3475      default: vpanic("eq_AvailExpr");
3476   }
3477}
3478
3479static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3480{
3481   IRConst *con, *con0, *con1;
3482   switch (ae->tag) {
3483      case Ut:
3484         return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3485      case Btt:
3486         return IRExpr_Binop( ae->u.Btt.op,
3487                              IRExpr_RdTmp(ae->u.Btt.arg1),
3488                              IRExpr_RdTmp(ae->u.Btt.arg2) );
3489      case Btc:
3490         con = LibVEX_Alloc(sizeof(IRConst));
3491         *con = ae->u.Btc.con2;
3492         return IRExpr_Binop( ae->u.Btc.op,
3493                              IRExpr_RdTmp(ae->u.Btc.arg1),
3494                              IRExpr_Const(con) );
3495      case Bct:
3496         con = LibVEX_Alloc(sizeof(IRConst));
3497         *con = ae->u.Bct.con1;
3498         return IRExpr_Binop( ae->u.Bct.op,
3499                              IRExpr_Const(con),
3500                              IRExpr_RdTmp(ae->u.Bct.arg2) );
3501      case Cf64i:
3502         return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3503      case Ittt:
3504         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3505                           IRExpr_RdTmp(ae->u.Ittt.e1),
3506                           IRExpr_RdTmp(ae->u.Ittt.e0));
3507      case Ittc:
3508         con0 = LibVEX_Alloc(sizeof(IRConst));
3509         *con0 = ae->u.Ittc.con0;
3510         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3511                           IRExpr_RdTmp(ae->u.Ittc.e1),
3512                           IRExpr_Const(con0));
3513      case Itct:
3514         con1 = LibVEX_Alloc(sizeof(IRConst));
3515         *con1 = ae->u.Itct.con1;
3516         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3517                           IRExpr_Const(con1),
3518                           IRExpr_RdTmp(ae->u.Itct.e0));
3519
3520      case Itcc:
3521         con0 = LibVEX_Alloc(sizeof(IRConst));
3522         con1 = LibVEX_Alloc(sizeof(IRConst));
3523         *con0 = ae->u.Itcc.con0;
3524         *con1 = ae->u.Itcc.con1;
3525         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3526                           IRExpr_Const(con1),
3527                           IRExpr_Const(con0));
3528      case GetIt:
3529         return IRExpr_GetI(ae->u.GetIt.descr,
3530                            IRExpr_RdTmp(ae->u.GetIt.ix),
3531                            ae->u.GetIt.bias);
3532      case CCall: {
3533         Int i, n = ae->u.CCall.nArgs;
3534         vassert(n >= 0);
3535         IRExpr** vec = LibVEX_Alloc((n+1) * sizeof(IRExpr*));
3536         vec[n] = NULL;
3537         for (i = 0; i < n; i++) {
3538            TmpOrConst* tc = &ae->u.CCall.args[i];
3539            if (tc->tag == TCc) {
3540               vec[i] = IRExpr_Const(tc->u.con);
3541            }
3542            else if (tc->tag == TCt) {
3543               vec[i] = IRExpr_RdTmp(tc->u.tmp);
3544            }
3545            else vpanic("availExpr_to_IRExpr:CCall-arg");
3546         }
3547         return IRExpr_CCall(ae->u.CCall.cee,
3548                             ae->u.CCall.retty,
3549                             vec);
3550      }
3551      default:
3552         vpanic("availExpr_to_IRExpr");
3553   }
3554}
3555
3556inline
3557static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3558{
3559   HWord res;
3560   /* env :: IRTemp -> IRTemp */
3561   if (lookupHHW( env, &res, (HWord)tmp ))
3562      return (IRTemp)res;
3563   else
3564      return tmp;
3565}
3566
3567static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3568{
3569   /* env :: IRTemp -> IRTemp */
3570   switch (ae->tag) {
3571      case Ut:
3572         ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3573         break;
3574      case Btt:
3575         ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3576         ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3577         break;
3578      case Btc:
3579         ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3580         break;
3581      case Bct:
3582         ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3583         break;
3584      case Cf64i:
3585         break;
3586      case Ittt:
3587         ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3588         ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3589         ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3590         break;
3591      case Ittc:
3592         ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3593         ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3594         break;
3595      case Itct:
3596         ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3597         ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3598         break;
3599      case Itcc:
3600         ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3601         break;
3602      case GetIt:
3603         ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3604         break;
3605      case CCall: {
3606         Int i, n = ae->u.CCall.nArgs;;
3607         for (i = 0; i < n; i++) {
3608            TmpOrConst* tc = &ae->u.CCall.args[i];
3609            if (tc->tag == TCt) {
3610               tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3611            }
3612         }
3613         break;
3614      }
3615      default:
3616         vpanic("subst_AvailExpr");
3617   }
3618}
3619
3620static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
3621{
3622   AvailExpr* ae;
3623
3624   switch (e->tag) {
3625      case Iex_Unop:
3626         if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3627            ae = LibVEX_Alloc(sizeof(AvailExpr));
3628            ae->tag      = Ut;
3629            ae->u.Ut.op  = e->Iex.Unop.op;
3630            ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3631            return ae;
3632         }
3633         break;
3634
3635      case Iex_Binop:
3636         if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3637            if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3638               ae = LibVEX_Alloc(sizeof(AvailExpr));
3639               ae->tag        = Btt;
3640               ae->u.Btt.op   = e->Iex.Binop.op;
3641               ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3642               ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3643               return ae;
3644            }
3645            if (e->Iex.Binop.arg2->tag == Iex_Const) {
3646               ae = LibVEX_Alloc(sizeof(AvailExpr));
3647               ae->tag        = Btc;
3648               ae->u.Btc.op   = e->Iex.Binop.op;
3649               ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3650               ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3651               return ae;
3652            }
3653         } else if (e->Iex.Binop.arg1->tag == Iex_Const
3654                    && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3655            ae = LibVEX_Alloc(sizeof(AvailExpr));
3656            ae->tag        = Bct;
3657            ae->u.Bct.op   = e->Iex.Binop.op;
3658            ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3659            ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3660            return ae;
3661         }
3662         break;
3663
3664      case Iex_Const:
3665         if (e->Iex.Const.con->tag == Ico_F64i) {
3666            ae = LibVEX_Alloc(sizeof(AvailExpr));
3667            ae->tag          = Cf64i;
3668            ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3669            return ae;
3670         }
3671         break;
3672
3673      case Iex_ITE:
3674         if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3675            if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3676               if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3677                  ae = LibVEX_Alloc(sizeof(AvailExpr));
3678                  ae->tag       = Ittt;
3679                  ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3680                  ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3681                  ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3682                  return ae;
3683               }
3684               if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3685                  ae = LibVEX_Alloc(sizeof(AvailExpr));
3686                  ae->tag       = Itct;
3687                  ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3688                  ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3689                  ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3690                  return ae;
3691               }
3692            } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3693               if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3694                  ae = LibVEX_Alloc(sizeof(AvailExpr));
3695                  ae->tag       = Ittc;
3696                  ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3697                  ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3698                  ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3699                  return ae;
3700               }
3701               if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3702                  ae = LibVEX_Alloc(sizeof(AvailExpr));
3703                  ae->tag       = Itcc;
3704                  ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3705                  ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3706                  ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3707                  return ae;
3708               }
3709            }
3710         }
3711         break;
3712
3713      case Iex_GetI:
3714         if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3715            ae = LibVEX_Alloc(sizeof(AvailExpr));
3716            ae->tag           = GetIt;
3717            ae->u.GetIt.descr = e->Iex.GetI.descr;
3718            ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3719            ae->u.GetIt.bias  = e->Iex.GetI.bias;
3720            return ae;
3721         }
3722         break;
3723
3724      case Iex_CCall:
3725         ae = LibVEX_Alloc(sizeof(AvailExpr));
3726         ae->tag = CCall;
3727         /* Ok to share only the cee, since it is immutable. */
3728         ae->u.CCall.cee   = e->Iex.CCall.cee;
3729         ae->u.CCall.retty = e->Iex.CCall.retty;
3730         /* irExprVec_to_TmpOrConsts will assert if the args are
3731            neither tmps nor constants, but that's ok .. that's all they
3732            should be. */
3733         irExprVec_to_TmpOrConsts(
3734                                  &ae->u.CCall.args, &ae->u.CCall.nArgs,
3735                                  e->Iex.CCall.args
3736                                 );
3737         return ae;
3738
3739      default:
3740         break;
3741   }
3742
3743   return NULL;
3744}
3745
3746
3747/* The BB is modified in-place.  Returns True if any changes were
3748   made. */
3749
3750static Bool do_cse_BB ( IRSB* bb )
3751{
3752   Int        i, j, paranoia;
3753   IRTemp     t, q;
3754   IRStmt*    st;
3755   AvailExpr* eprime;
3756   AvailExpr* ae;
3757   Bool       invalidate;
3758   Bool       anyDone = False;
3759
3760   HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3761   HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3762
3763   vassert(sizeof(IRTemp) <= sizeof(HWord));
3764
3765   if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3766
3767   /* Iterate forwards over the stmts.
3768      On seeing "t = E", where E is one of the AvailExpr forms:
3769         let E' = apply tenv substitution to E
3770         search aenv for E'
3771            if a mapping E' -> q is found,
3772               replace this stmt by "t = q"
3773               and add binding t -> q to tenv
3774            else
3775               add binding E' -> t to aenv
3776               replace this stmt by "t = E'"
3777
3778      Other statements are only interesting to the extent that they
3779      might invalidate some of the expressions in aenv.  So there is
3780      an invalidate-bindings check for each statement seen.
3781   */
3782   for (i = 0; i < bb->stmts_used; i++) {
3783      st = bb->stmts[i];
3784
3785      /* ------ BEGIN invalidate aenv bindings ------ */
3786      /* This is critical: remove from aenv any E' -> .. bindings
3787         which might be invalidated by this statement.  The only
3788         vulnerable kind of bindings are the GetI kind.
3789            Dirty call - dump (paranoia level -> 2)
3790            Store      - dump (ditto)
3791            Put, PutI  - dump unless no-overlap is proven (.. -> 1)
3792         Uses getAliasingRelation_IC and getAliasingRelation_II
3793         to do the no-overlap assessments needed for Put/PutI.
3794      */
3795      switch (st->tag) {
3796         case Ist_Dirty: case Ist_Store: case Ist_MBE:
3797         case Ist_CAS: case Ist_LLSC:
3798         case Ist_StoreG:
3799            paranoia = 2; break;
3800         case Ist_Put: case Ist_PutI:
3801            paranoia = 1; break;
3802         case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3803         case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3804            paranoia = 0; break;
3805         default:
3806            vpanic("do_cse_BB(1)");
3807      }
3808
3809      if (paranoia > 0) {
3810         for (j = 0; j < aenv->used; j++) {
3811            if (!aenv->inuse[j])
3812               continue;
3813            ae = (AvailExpr*)aenv->key[j];
3814            if (ae->tag != GetIt)
3815               continue;
3816            invalidate = False;
3817            if (paranoia >= 2) {
3818               invalidate = True;
3819            } else {
3820               vassert(paranoia == 1);
3821               if (st->tag == Ist_Put) {
3822                  if (getAliasingRelation_IC(
3823                         ae->u.GetIt.descr,
3824                         IRExpr_RdTmp(ae->u.GetIt.ix),
3825                         st->Ist.Put.offset,
3826                         typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3827                      ) != NoAlias)
3828                     invalidate = True;
3829               }
3830               else
3831               if (st->tag == Ist_PutI) {
3832                  IRPutI *puti = st->Ist.PutI.details;
3833                  if (getAliasingRelation_II(
3834                         ae->u.GetIt.descr,
3835                         IRExpr_RdTmp(ae->u.GetIt.ix),
3836                         ae->u.GetIt.bias,
3837                         puti->descr,
3838                         puti->ix,
3839                         puti->bias
3840                      ) != NoAlias)
3841                     invalidate = True;
3842               }
3843               else
3844                  vpanic("do_cse_BB(2)");
3845            }
3846
3847            if (invalidate) {
3848               aenv->inuse[j] = False;
3849               aenv->key[j]   = (HWord)NULL;  /* be sure */
3850            }
3851         } /* for j */
3852      } /* paranoia > 0 */
3853
3854      /* ------ ENV invalidate aenv bindings ------ */
3855
3856      /* ignore not-interestings */
3857      if (st->tag != Ist_WrTmp)
3858         continue;
3859
3860      t = st->Ist.WrTmp.tmp;
3861      eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
3862      /* ignore if not of AvailExpr form */
3863      if (!eprime)
3864         continue;
3865
3866      /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3867
3868      /* apply tenv */
3869      subst_AvailExpr( tenv, eprime );
3870
3871      /* search aenv for eprime, unfortunately the hard way */
3872      for (j = 0; j < aenv->used; j++)
3873         if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
3874            break;
3875
3876      if (j < aenv->used) {
3877         /* A binding E' -> q was found.  Replace stmt by "t = q" and
3878            note the t->q binding in tenv. */
3879         /* (this is the core of the CSE action) */
3880         q = (IRTemp)aenv->val[j];
3881         bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
3882         addToHHW( tenv, (HWord)t, (HWord)q );
3883         anyDone = True;
3884      } else {
3885         /* No binding was found, so instead we add E' -> t to our
3886            collection of available expressions, replace this stmt
3887            with "t = E'", and move on. */
3888         bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
3889         addToHHW( aenv, (HWord)eprime, (HWord)t );
3890      }
3891   }
3892
3893   /*
3894   ppIRSB(bb);
3895   sanityCheckIRSB(bb, Ity_I32);
3896   vex_printf("\n\n");
3897   */
3898   return anyDone;
3899}
3900
3901
3902/*---------------------------------------------------------------*/
3903/*--- Add32/Sub32 chain collapsing                            ---*/
3904/*---------------------------------------------------------------*/
3905
3906/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
3907
3908/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
3909   yes, set *tmp and *i32 appropriately.  *i32 is set as if the
3910   root node is Add32, not Sub32. */
3911
3912static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
3913{
3914   if (e->tag != Iex_Binop)
3915      return False;
3916   if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
3917      return False;
3918   if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
3919      return False;
3920   if (e->Iex.Binop.arg2->tag != Iex_Const)
3921      return False;
3922   *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3923   *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
3924   if (e->Iex.Binop.op == Iop_Sub32)
3925      *i32 = -*i32;
3926   return True;
3927}
3928
3929
3930/* Figure out if tmp can be expressed as tmp2 +32 const, for some
3931   other tmp2.  Scan backwards from the specified start point -- an
3932   optimisation. */
3933
3934static Bool collapseChain ( IRSB* bb, Int startHere,
3935                            IRTemp tmp,
3936                            IRTemp* tmp2, Int* i32 )
3937{
3938   Int     j, ii;
3939   IRTemp  vv;
3940   IRStmt* st;
3941   IRExpr* e;
3942
3943   /* the (var, con) pair contain the current 'representation' for
3944      'tmp'.  We start with 'tmp + 0'.  */
3945   IRTemp var = tmp;
3946   Int    con = 0;
3947
3948   /* Scan backwards to see if tmp can be replaced by some other tmp
3949     +/- a constant. */
3950   for (j = startHere; j >= 0; j--) {
3951      st = bb->stmts[j];
3952      if (st->tag != Ist_WrTmp)
3953         continue;
3954      if (st->Ist.WrTmp.tmp != var)
3955         continue;
3956      e = st->Ist.WrTmp.data;
3957      if (!isAdd32OrSub32(e, &vv, &ii))
3958         break;
3959      var = vv;
3960      con += ii;
3961   }
3962   if (j == -1)
3963      /* no earlier binding for var .. ill-formed IR */
3964      vpanic("collapseChain");
3965
3966   /* so, did we find anything interesting? */
3967   if (var == tmp)
3968      return False; /* no .. */
3969
3970   *tmp2 = var;
3971   *i32  = con;
3972   return True;
3973}
3974
3975
3976/* ------- Main function for Add32/Sub32 chain collapsing ------ */
3977
3978static void collapse_AddSub_chains_BB ( IRSB* bb )
3979{
3980   IRStmt *st;
3981   IRTemp var, var2;
3982   Int    i, con, con2;
3983
3984   for (i = bb->stmts_used-1; i >= 0; i--) {
3985      st = bb->stmts[i];
3986      if (st->tag == Ist_NoOp)
3987         continue;
3988
3989      /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
3990
3991      if (st->tag == Ist_WrTmp
3992          && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
3993
3994         /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
3995            Find out if var can be expressed as var2 + con2. */
3996         if (collapseChain(bb, i-1, var, &var2, &con2)) {
3997            if (DEBUG_IROPT) {
3998               vex_printf("replacing1 ");
3999               ppIRStmt(st);
4000               vex_printf(" with ");
4001            }
4002            con2 += con;
4003            bb->stmts[i]
4004               = IRStmt_WrTmp(
4005                    st->Ist.WrTmp.tmp,
4006                    (con2 >= 0)
4007                      ? IRExpr_Binop(Iop_Add32,
4008                                     IRExpr_RdTmp(var2),
4009                                     IRExpr_Const(IRConst_U32(con2)))
4010                      : IRExpr_Binop(Iop_Sub32,
4011                                     IRExpr_RdTmp(var2),
4012                                     IRExpr_Const(IRConst_U32(-con2)))
4013                 );
4014            if (DEBUG_IROPT) {
4015               ppIRStmt(bb->stmts[i]);
4016               vex_printf("\n");
4017            }
4018         }
4019
4020         continue;
4021      }
4022
4023      /* Try to collapse 't1 = GetI[t2, con]'. */
4024
4025      if (st->tag == Ist_WrTmp
4026          && st->Ist.WrTmp.data->tag == Iex_GetI
4027          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4028          && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4029                                      ->Iex.RdTmp.tmp, &var2, &con2)) {
4030         if (DEBUG_IROPT) {
4031            vex_printf("replacing3 ");
4032            ppIRStmt(st);
4033            vex_printf(" with ");
4034         }
4035         con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4036         bb->stmts[i]
4037            = IRStmt_WrTmp(
4038                 st->Ist.WrTmp.tmp,
4039                 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4040                             IRExpr_RdTmp(var2),
4041                             con2));
4042         if (DEBUG_IROPT) {
4043            ppIRStmt(bb->stmts[i]);
4044            vex_printf("\n");
4045         }
4046         continue;
4047      }
4048
4049      /* Perhaps st is PutI[t, con] ? */
4050      IRPutI *puti = st->Ist.PutI.details;
4051      if (st->tag == Ist_PutI
4052          && puti->ix->tag == Iex_RdTmp
4053          && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4054                               &var2, &con2)) {
4055         if (DEBUG_IROPT) {
4056            vex_printf("replacing2 ");
4057            ppIRStmt(st);
4058            vex_printf(" with ");
4059         }
4060         con2 += puti->bias;
4061         bb->stmts[i]
4062            = IRStmt_PutI(mkIRPutI(puti->descr,
4063                                   IRExpr_RdTmp(var2),
4064                                   con2,
4065                                   puti->data));
4066         if (DEBUG_IROPT) {
4067            ppIRStmt(bb->stmts[i]);
4068            vex_printf("\n");
4069         }
4070         continue;
4071      }
4072
4073   } /* for */
4074}
4075
4076
4077/*---------------------------------------------------------------*/
4078/*--- PutI/GetI transformations                               ---*/
4079/*---------------------------------------------------------------*/
4080
4081/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4082   the given starting point to find, if any, a PutI which writes
4083   exactly the same piece of guest state, and so return the expression
4084   that the PutI writes.  This is the core of PutI-GetI forwarding. */
4085
4086static
4087IRExpr* findPutI ( IRSB* bb, Int startHere,
4088                   IRRegArray* descrG, IRExpr* ixG, Int biasG )
4089{
4090   Int        j;
4091   IRStmt*    st;
4092   GSAliasing relation;
4093
4094   if (0) {
4095      vex_printf("\nfindPutI ");
4096      ppIRRegArray(descrG);
4097      vex_printf(" ");
4098      ppIRExpr(ixG);
4099      vex_printf(" %d\n", biasG);
4100   }
4101
4102   /* Scan backwards in bb from startHere to find a suitable PutI
4103      binding for (descrG, ixG, biasG), if any. */
4104
4105   for (j = startHere; j >= 0; j--) {
4106      st = bb->stmts[j];
4107      if (st->tag == Ist_NoOp)
4108         continue;
4109
4110      if (st->tag == Ist_Put) {
4111         /* Non-indexed Put.  This can't give a binding, but we do
4112            need to check it doesn't invalidate the search by
4113            overlapping any part of the indexed guest state. */
4114
4115         relation
4116            = getAliasingRelation_IC(
4117                 descrG, ixG,
4118                 st->Ist.Put.offset,
4119                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4120
4121         if (relation == NoAlias) {
4122            /* we're OK; keep going */
4123            continue;
4124         } else {
4125            /* relation == UnknownAlias || relation == ExactAlias */
4126            /* If this assertion fails, we've found a Put which writes
4127               an area of guest state which is read by a GetI.  Which
4128               is unlikely (although not per se wrong). */
4129            vassert(relation != ExactAlias);
4130            /* This Put potentially writes guest state that the GetI
4131               reads; we must fail. */
4132            return NULL;
4133         }
4134      }
4135
4136      if (st->tag == Ist_PutI) {
4137         IRPutI *puti = st->Ist.PutI.details;
4138
4139         relation = getAliasingRelation_II(
4140                       descrG, ixG, biasG,
4141                       puti->descr,
4142                       puti->ix,
4143                       puti->bias
4144                    );
4145
4146         if (relation == NoAlias) {
4147            /* This PutI definitely doesn't overlap.  Ignore it and
4148               keep going. */
4149            continue; /* the for j loop */
4150         }
4151
4152         if (relation == UnknownAlias) {
4153            /* We don't know if this PutI writes to the same guest
4154               state that the GetI, or not.  So we have to give up. */
4155            return NULL;
4156         }
4157
4158         /* Otherwise, we've found what we're looking for.  */
4159         vassert(relation == ExactAlias);
4160         return puti->data;
4161
4162      } /* if (st->tag == Ist_PutI) */
4163
4164      if (st->tag == Ist_Dirty) {
4165         /* Be conservative.  If the dirty call has any guest effects at
4166            all, give up.  We could do better -- only give up if there
4167            are any guest writes/modifies. */
4168         if (st->Ist.Dirty.details->nFxState > 0)
4169            return NULL;
4170      }
4171
4172   } /* for */
4173
4174   /* No valid replacement was found. */
4175   return NULL;
4176}
4177
4178
4179
4180/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4181   that it writes exactly the same piece of guest state) ?  Safe
4182   answer: False. */
4183
4184static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4185{
4186   vassert(pi->tag == Ist_PutI);
4187   if (s2->tag != Ist_PutI)
4188      return False;
4189
4190   IRPutI *p1 = pi->Ist.PutI.details;
4191   IRPutI *p2 = s2->Ist.PutI.details;
4192
4193   return toBool(
4194          getAliasingRelation_II(
4195             p1->descr, p1->ix, p1->bias,
4196             p2->descr, p2->ix, p2->bias
4197          )
4198          == ExactAlias
4199          );
4200}
4201
4202
4203/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4204   overlap it?  Safe answer: True.  Note, we could do a lot better
4205   than this if needed. */
4206
4207static
4208Bool guestAccessWhichMightOverlapPutI (
4209        IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4210     )
4211{
4212   GSAliasing relation;
4213   UInt       minoffP, maxoffP;
4214
4215   vassert(pi->tag == Ist_PutI);
4216
4217   IRPutI *p1 = pi->Ist.PutI.details;
4218
4219   getArrayBounds(p1->descr, &minoffP, &maxoffP);
4220   switch (s2->tag) {
4221
4222      case Ist_NoOp:
4223      case Ist_IMark:
4224         return False;
4225
4226      case Ist_MBE:
4227      case Ist_AbiHint:
4228         /* just be paranoid ... these should be rare. */
4229         return True;
4230
4231      case Ist_CAS:
4232         /* This is unbelievably lame, but it's probably not
4233            significant from a performance point of view.  Really, a
4234            CAS is a load-store op, so it should be safe to say False.
4235            However .. */
4236         return True;
4237
4238      case Ist_Dirty:
4239         /* If the dirty call has any guest effects at all, give up.
4240            Probably could do better. */
4241         if (s2->Ist.Dirty.details->nFxState > 0)
4242            return True;
4243         return False;
4244
4245      case Ist_Put:
4246         vassert(isIRAtom(s2->Ist.Put.data));
4247         relation
4248            = getAliasingRelation_IC(
4249                 p1->descr, p1->ix,
4250                 s2->Ist.Put.offset,
4251                 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4252              );
4253         goto have_relation;
4254
4255      case Ist_PutI: {
4256         IRPutI *p2 = s2->Ist.PutI.details;
4257
4258         vassert(isIRAtom(p2->ix));
4259         vassert(isIRAtom(p2->data));
4260         relation
4261            = getAliasingRelation_II(
4262                 p1->descr, p1->ix, p1->bias,
4263                 p2->descr, p2->ix, p2->bias
4264              );
4265         goto have_relation;
4266      }
4267
4268      case Ist_WrTmp:
4269         if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4270            relation
4271               = getAliasingRelation_II(
4272                    p1->descr, p1->ix, p1->bias,
4273                    s2->Ist.WrTmp.data->Iex.GetI.descr,
4274                    s2->Ist.WrTmp.data->Iex.GetI.ix,
4275                    s2->Ist.WrTmp.data->Iex.GetI.bias
4276                 );
4277            goto have_relation;
4278         }
4279         if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4280            relation
4281               = getAliasingRelation_IC(
4282                    p1->descr, p1->ix,
4283                    s2->Ist.WrTmp.data->Iex.Get.offset,
4284                    s2->Ist.WrTmp.data->Iex.Get.ty
4285                 );
4286            goto have_relation;
4287         }
4288         return False;
4289
4290      case Ist_Store:
4291         vassert(isIRAtom(s2->Ist.Store.addr));
4292         vassert(isIRAtom(s2->Ist.Store.data));
4293         return False;
4294
4295      default:
4296         vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4297         vpanic("guestAccessWhichMightOverlapPutI");
4298   }
4299
4300  have_relation:
4301   if (relation == NoAlias)
4302      return False;
4303   else
4304      return True; /* ExactAlias or UnknownAlias */
4305}
4306
4307
4308
4309/* ---------- PutI/GetI transformations main functions --------- */
4310
4311/* Remove redundant GetIs, to the extent that they can be detected.
4312   bb is modified in-place. */
4313
4314static
4315void do_redundant_GetI_elimination ( IRSB* bb )
4316{
4317   Int     i;
4318   IRStmt* st;
4319
4320   for (i = bb->stmts_used-1; i >= 0; i--) {
4321      st = bb->stmts[i];
4322      if (st->tag == Ist_NoOp)
4323         continue;
4324
4325      if (st->tag == Ist_WrTmp
4326          && st->Ist.WrTmp.data->tag == Iex_GetI
4327          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4328         IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4329         IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
4330         Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
4331         IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
4332         if (replacement
4333             && isIRAtom(replacement)
4334             /* Make sure we're doing a type-safe transformation! */
4335             && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4336            if (DEBUG_IROPT) {
4337               vex_printf("rGI:  ");
4338               ppIRExpr(st->Ist.WrTmp.data);
4339               vex_printf(" -> ");
4340               ppIRExpr(replacement);
4341               vex_printf("\n");
4342            }
4343            bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4344         }
4345      }
4346   }
4347
4348}
4349
4350
4351/* Remove redundant PutIs, to the extent which they can be detected.
4352   bb is modified in-place. */
4353
4354static
4355void do_redundant_PutI_elimination ( IRSB* bb )
4356{
4357   Int    i, j;
4358   Bool   delete;
4359   IRStmt *st, *stj;
4360
4361   vassert(vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn);
4362
4363   for (i = 0; i < bb->stmts_used; i++) {
4364      st = bb->stmts[i];
4365      if (st->tag != Ist_PutI)
4366         continue;
4367      /* Ok, search forwards from here to see if we can find another
4368         PutI which makes this one redundant, and dodging various
4369         hazards.  Search forwards:
4370         * If conditional exit, give up (because anything after that
4371           does not postdominate this put).
4372         * If a Get which might overlap, give up (because this PutI
4373           not necessarily dead).
4374         * If a Put which is identical, stop with success.
4375         * If a Put which might overlap, but is not identical, give up.
4376         * If a dirty helper call which might write guest state, give up.
4377         * If a Put which definitely doesn't overlap, or any other
4378           kind of stmt, continue.
4379      */
4380      delete = False;
4381      for (j = i+1; j < bb->stmts_used; j++) {
4382         stj = bb->stmts[j];
4383         if (stj->tag == Ist_NoOp)
4384            continue;
4385         if (identicalPutIs(st, stj)) {
4386            /* success! */
4387            delete = True;
4388            break;
4389         }
4390         if (stj->tag == Ist_Exit)
4391            /* give up */
4392            break;
4393         if (st->tag == Ist_Dirty)
4394            /* give up; could do better here */
4395            break;
4396         if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4397            /* give up */
4398           break;
4399      }
4400
4401      if (delete) {
4402         if (DEBUG_IROPT) {
4403            vex_printf("rPI:  ");
4404            ppIRStmt(st);
4405            vex_printf("\n");
4406         }
4407         bb->stmts[i] = IRStmt_NoOp();
4408      }
4409
4410   }
4411}
4412
4413
4414/*---------------------------------------------------------------*/
4415/*--- Loop unrolling                                          ---*/
4416/*---------------------------------------------------------------*/
4417
4418/* Adjust all tmp values (names) in e by delta.  e is destructively
4419   modified. */
4420
4421static void deltaIRExpr ( IRExpr* e, Int delta )
4422{
4423   Int i;
4424   switch (e->tag) {
4425      case Iex_RdTmp:
4426         e->Iex.RdTmp.tmp += delta;
4427         break;
4428      case Iex_Get:
4429      case Iex_Const:
4430         break;
4431      case Iex_GetI:
4432         deltaIRExpr(e->Iex.GetI.ix, delta);
4433         break;
4434      case Iex_Qop:
4435         deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4436         deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4437         deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4438         deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4439         break;
4440      case Iex_Triop:
4441         deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4442         deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4443         deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4444         break;
4445      case Iex_Binop:
4446         deltaIRExpr(e->Iex.Binop.arg1, delta);
4447         deltaIRExpr(e->Iex.Binop.arg2, delta);
4448         break;
4449      case Iex_Unop:
4450         deltaIRExpr(e->Iex.Unop.arg, delta);
4451         break;
4452      case Iex_Load:
4453         deltaIRExpr(e->Iex.Load.addr, delta);
4454         break;
4455      case Iex_CCall:
4456         for (i = 0; e->Iex.CCall.args[i]; i++)
4457            deltaIRExpr(e->Iex.CCall.args[i], delta);
4458         break;
4459      case Iex_ITE:
4460         deltaIRExpr(e->Iex.ITE.cond, delta);
4461         deltaIRExpr(e->Iex.ITE.iftrue, delta);
4462         deltaIRExpr(e->Iex.ITE.iffalse, delta);
4463         break;
4464      default:
4465         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4466         vpanic("deltaIRExpr");
4467   }
4468}
4469
4470/* Adjust all tmp values (names) in st by delta.  st is destructively
4471   modified. */
4472
4473static void deltaIRStmt ( IRStmt* st, Int delta )
4474{
4475   Int      i;
4476   IRDirty* d;
4477   switch (st->tag) {
4478      case Ist_NoOp:
4479      case Ist_IMark:
4480      case Ist_MBE:
4481         break;
4482      case Ist_AbiHint:
4483         deltaIRExpr(st->Ist.AbiHint.base, delta);
4484         deltaIRExpr(st->Ist.AbiHint.nia, delta);
4485         break;
4486      case Ist_Put:
4487         deltaIRExpr(st->Ist.Put.data, delta);
4488         break;
4489      case Ist_PutI:
4490         deltaIRExpr(st->Ist.PutI.details->ix, delta);
4491         deltaIRExpr(st->Ist.PutI.details->data, delta);
4492         break;
4493      case Ist_WrTmp:
4494         st->Ist.WrTmp.tmp += delta;
4495         deltaIRExpr(st->Ist.WrTmp.data, delta);
4496         break;
4497      case Ist_Exit:
4498         deltaIRExpr(st->Ist.Exit.guard, delta);
4499         break;
4500      case Ist_Store:
4501         deltaIRExpr(st->Ist.Store.addr, delta);
4502         deltaIRExpr(st->Ist.Store.data, delta);
4503         break;
4504      case Ist_StoreG: {
4505         IRStoreG* sg = st->Ist.StoreG.details;
4506         deltaIRExpr(sg->addr, delta);
4507         deltaIRExpr(sg->data, delta);
4508         deltaIRExpr(sg->guard, delta);
4509         break;
4510      }
4511      case Ist_LoadG: {
4512         IRLoadG* lg = st->Ist.LoadG.details;
4513         lg->dst += delta;
4514         deltaIRExpr(lg->addr, delta);
4515         deltaIRExpr(lg->alt, delta);
4516         deltaIRExpr(lg->guard, delta);
4517         break;
4518      }
4519      case Ist_CAS:
4520         if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4521            st->Ist.CAS.details->oldHi += delta;
4522         st->Ist.CAS.details->oldLo += delta;
4523         deltaIRExpr(st->Ist.CAS.details->addr, delta);
4524         if (st->Ist.CAS.details->expdHi)
4525            deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4526         deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4527         if (st->Ist.CAS.details->dataHi)
4528            deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4529         deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4530         break;
4531      case Ist_LLSC:
4532         st->Ist.LLSC.result += delta;
4533         deltaIRExpr(st->Ist.LLSC.addr, delta);
4534         if (st->Ist.LLSC.storedata)
4535            deltaIRExpr(st->Ist.LLSC.storedata, delta);
4536         break;
4537      case Ist_Dirty:
4538         d = st->Ist.Dirty.details;
4539         deltaIRExpr(d->guard, delta);
4540         for (i = 0; d->args[i]; i++) {
4541            IRExpr* arg = d->args[i];
4542            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
4543               deltaIRExpr(arg, delta);
4544         }
4545         if (d->tmp != IRTemp_INVALID)
4546            d->tmp += delta;
4547         if (d->mAddr)
4548            deltaIRExpr(d->mAddr, delta);
4549         break;
4550      default:
4551         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4552         vpanic("deltaIRStmt");
4553   }
4554}
4555
4556
4557/* If possible, return a loop-unrolled version of bb0.  The original
4558   is changed.  If not possible, return NULL.  */
4559
4560/* The two schemas considered are:
4561
4562     X: BODY; goto X
4563
4564     which unrolls to (eg)  X: BODY;BODY; goto X
4565
4566   and
4567
4568       X: BODY; if (c) goto X; goto Y
4569   which trivially transforms to
4570       X: BODY; if (!c) goto Y; goto X;
4571   so it falls in the scope of the first case.
4572
4573   X and Y must be literal (guest) addresses.
4574*/
4575
4576static Int calc_unroll_factor( IRSB* bb )
4577{
4578   Int n_stmts, i;
4579
4580   n_stmts = 0;
4581   for (i = 0; i < bb->stmts_used; i++) {
4582      if (bb->stmts[i]->tag != Ist_NoOp)
4583         n_stmts++;
4584   }
4585
4586   if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4587      if (vex_control.iropt_verbosity > 0)
4588         vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4589                    n_stmts, 8* n_stmts);
4590      return 8;
4591   }
4592   if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4593      if (vex_control.iropt_verbosity > 0)
4594         vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4595                    n_stmts, 4* n_stmts);
4596      return 4;
4597   }
4598
4599   if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4600      if (vex_control.iropt_verbosity > 0)
4601         vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4602                    n_stmts, 2* n_stmts);
4603      return 2;
4604   }
4605
4606   if (vex_control.iropt_verbosity > 0)
4607      vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4608
4609   return 1;
4610}
4611
4612
4613static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
4614{
4615   Int      i, j, jmax, n_vars;
4616   Bool     xxx_known;
4617   Addr64   xxx_value, yyy_value;
4618   IRExpr*  udst;
4619   IRStmt*  st;
4620   IRConst* con;
4621   IRSB     *bb1, *bb2;
4622   Int      unroll_factor;
4623
4624   if (vex_control.iropt_unroll_thresh <= 0)
4625      return NULL;
4626
4627   /* First off, figure out if we can unroll this loop.  Do this
4628      without modifying bb0. */
4629
4630   if (bb0->jumpkind != Ijk_Boring)
4631      return NULL;
4632
4633   xxx_known = False;
4634   xxx_value = 0;
4635
4636   /* Extract the next-guest address.  If it isn't a literal, we
4637      have to give up. */
4638
4639   udst = bb0->next;
4640   if (udst->tag == Iex_Const
4641       && (udst->Iex.Const.con->tag == Ico_U32
4642           || udst->Iex.Const.con->tag == Ico_U64)) {
4643      /* The BB ends in a jump to a literal location. */
4644      xxx_known = True;
4645      xxx_value = udst->Iex.Const.con->tag == Ico_U64
4646                    ?  udst->Iex.Const.con->Ico.U64
4647                    : (Addr64)(udst->Iex.Const.con->Ico.U32);
4648   }
4649
4650   if (!xxx_known)
4651      return NULL;
4652
4653   /* Now we know the BB ends to a jump to a literal location.  If
4654      it's a jump to itself (viz, idiom #1), move directly to the
4655      unrolling stage, first cloning the bb so the original isn't
4656      modified. */
4657   if (xxx_value == my_addr) {
4658      unroll_factor = calc_unroll_factor( bb0 );
4659      if (unroll_factor < 2)
4660         return NULL;
4661      bb1 = deepCopyIRSB( bb0 );
4662      bb0 = NULL;
4663      udst = NULL; /* is now invalid */
4664      goto do_unroll;
4665   }
4666
4667   /* Search for the second idiomatic form:
4668        X: BODY; if (c) goto X; goto Y
4669      We know Y, but need to establish that the last stmt
4670      is 'if (c) goto X'.
4671   */
4672   yyy_value = xxx_value;
4673   for (i = bb0->stmts_used-1; i >= 0; i--)
4674      if (bb0->stmts[i])
4675         break;
4676
4677   if (i < 0)
4678      return NULL; /* block with no stmts.  Strange. */
4679
4680   st = bb0->stmts[i];
4681   if (st->tag != Ist_Exit)
4682      return NULL;
4683   if (st->Ist.Exit.jk != Ijk_Boring)
4684      return NULL;
4685
4686   con = st->Ist.Exit.dst;
4687   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4688
4689   xxx_value = con->tag == Ico_U64
4690                  ? st->Ist.Exit.dst->Ico.U64
4691                  : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4692
4693   /* If this assertion fails, we have some kind of type error. */
4694   vassert(con->tag == udst->Iex.Const.con->tag);
4695
4696   if (xxx_value != my_addr)
4697      /* We didn't find either idiom.  Give up. */
4698      return NULL;
4699
4700   /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
4701      yyy values (which makes it look like idiom #1), and go into
4702      unrolling proper.  This means finding (again) the last stmt, in
4703      the copied BB. */
4704
4705   unroll_factor = calc_unroll_factor( bb0 );
4706   if (unroll_factor < 2)
4707      return NULL;
4708
4709   bb1 = deepCopyIRSB( bb0 );
4710   bb0 = NULL;
4711   udst = NULL; /* is now invalid */
4712   for (i = bb1->stmts_used-1; i >= 0; i--)
4713      if (bb1->stmts[i])
4714         break;
4715
4716   /* The next bunch of assertions should be true since we already
4717      found and checked the last stmt in the original bb. */
4718
4719   vassert(i >= 0);
4720
4721   st = bb1->stmts[i];
4722   vassert(st->tag == Ist_Exit);
4723
4724   con = st->Ist.Exit.dst;
4725   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4726
4727   udst = bb1->next;
4728   vassert(udst->tag == Iex_Const);
4729   vassert(udst->Iex.Const.con->tag == Ico_U32
4730          || udst->Iex.Const.con->tag == Ico_U64);
4731   vassert(con->tag == udst->Iex.Const.con->tag);
4732
4733   /* switch the xxx and yyy fields around */
4734   if (con->tag == Ico_U64) {
4735      udst->Iex.Const.con->Ico.U64 = xxx_value;
4736      con->Ico.U64 = yyy_value;
4737   } else {
4738      udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4739      con->Ico.U32 = (UInt)yyy_value;
4740   }
4741
4742   /* negate the test condition */
4743   st->Ist.Exit.guard
4744      = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4745
4746   /* --- The unroller proper.  Both idioms are by now --- */
4747   /* --- now converted to idiom 1. --- */
4748
4749  do_unroll:
4750
4751   vassert(unroll_factor == 2
4752           || unroll_factor == 4
4753           || unroll_factor == 8);
4754
4755   jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4756   for (j = 1; j <= jmax; j++) {
4757
4758      n_vars = bb1->tyenv->types_used;
4759
4760      bb2 = deepCopyIRSB(bb1);
4761      for (i = 0; i < n_vars; i++)
4762         (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4763
4764      for (i = 0; i < bb2->stmts_used; i++) {
4765         /* deltaIRStmt destructively modifies the stmt, but
4766            that's OK since bb2 is a complete fresh copy of bb1. */
4767         deltaIRStmt(bb2->stmts[i], n_vars);
4768         addStmtToIRSB(bb1, bb2->stmts[i]);
4769      }
4770   }
4771
4772   if (DEBUG_IROPT) {
4773      vex_printf("\nUNROLLED (%llx)\n", my_addr);
4774      ppIRSB(bb1);
4775      vex_printf("\n");
4776   }
4777
4778   /* Flattening; sigh.  The unroller succeeds in breaking flatness
4779      by negating the test condition.  This should be fixed properly.
4780      For the moment use this shotgun approach.  */
4781   return flatten_BB(bb1);
4782}
4783
4784
4785/*---------------------------------------------------------------*/
4786/*--- The tree builder                                        ---*/
4787/*---------------------------------------------------------------*/
4788
4789/* This isn't part of IR optimisation.  Really it's a pass done prior
4790   to instruction selection, which improves the code that the
4791   instruction selector can produce. */
4792
4793/* --- The 'tmp' environment is the central data structure here --- */
4794
4795/* The number of outstanding bindings we're prepared to track.
4796   The number of times the env becomes full and we have to dump
4797   the oldest binding (hence reducing code quality) falls very
4798   rapidly as the env size increases.  8 gives reasonable performance
4799   under most circumstances. */
4800#define A_NENV 10
4801
4802/* An interval. Used to record the bytes in the guest state accessed
4803   by a Put[I] statement or by (one or more) Get[I] expression(s). In
4804   case of several Get[I] expressions, the lower/upper bounds are recorded.
4805   This is conservative but cheap.
4806   E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4807   E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4808   offset 200 would be recorded as [100,203] */
4809typedef
4810   struct {
4811      Bool present;
4812      Int  low;
4813      Int  high;
4814   }
4815   Interval;
4816
4817static inline Bool
4818intervals_overlap(Interval i1, Interval i2)
4819{
4820   return (i1.low >= i2.low && i1.low <= i2.high) ||
4821          (i2.low >= i1.low && i2.low <= i1.high);
4822}
4823
4824static inline void
4825update_interval(Interval *i, Int low, Int high)
4826{
4827   vassert(low <= high);
4828
4829   if (i->present) {
4830      if (low  < i->low)  i->low  = low;
4831      if (high > i->high) i->high = high;
4832   } else {
4833      i->present = True;
4834      i->low  = low;
4835      i->high = high;
4836   }
4837}
4838
4839
4840/* bindee == NULL   ===  slot is not in use
4841   bindee != NULL   ===  slot is in use
4842*/
4843typedef
4844   struct {
4845      IRTemp  binder;
4846      IRExpr* bindee;
4847      Bool    doesLoad;
4848      /* Record the bytes of the guest state BINDEE reads from. */
4849      Interval getInterval;
4850   }
4851   ATmpInfo;
4852
4853__attribute__((unused))
4854static void ppAEnv ( ATmpInfo* env )
4855{
4856   Int i;
4857   for (i = 0; i < A_NENV; i++) {
4858      vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
4859      if (env[i].bindee)
4860         ppIRExpr(env[i].bindee);
4861      else
4862         vex_printf("(null)");
4863      vex_printf("\n");
4864   }
4865}
4866
4867/* --- Tree-traversal fns --- */
4868
4869/* Traverse an expr, and detect if any part of it reads memory or does
4870   a Get.  Be careful ... this really controls how much the
4871   tree-builder can reorder the code, so getting it right is critical.
4872*/
4873static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
4874{
4875   Int i;
4876   switch (e->tag) {
4877      case Iex_CCall:
4878         for (i = 0; e->Iex.CCall.args[i]; i++)
4879            setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
4880         return;
4881      case Iex_ITE:
4882         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
4883         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
4884         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
4885         return;
4886      case Iex_Qop:
4887         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
4888         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
4889         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
4890         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
4891         return;
4892      case Iex_Triop:
4893         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
4894         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
4895         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
4896         return;
4897      case Iex_Binop:
4898         setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
4899         setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
4900         return;
4901      case Iex_Unop:
4902         setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
4903         return;
4904      case Iex_Load:
4905         *doesLoad = True;
4906         setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
4907         return;
4908      case Iex_Get: {
4909         Int low = e->Iex.Get.offset;
4910         Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
4911         update_interval(getInterval, low, high);
4912         return;
4913      }
4914      case Iex_GetI: {
4915         IRRegArray *descr = e->Iex.GetI.descr;
4916         Int size = sizeofIRType(descr->elemTy);
4917         Int low  = descr->base;
4918         Int high = low + descr->nElems * size - 1;
4919         update_interval(getInterval, low, high);
4920         setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
4921         return;
4922      }
4923      case Iex_RdTmp:
4924      case Iex_Const:
4925         return;
4926      default:
4927         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4928         vpanic("setHints_Expr");
4929   }
4930}
4931
4932
4933/* Add a binding to the front of the env and slide all the rest
4934   backwards.  It should be the case that the last slot is free. */
4935static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
4936{
4937   Int i;
4938   vassert(env[A_NENV-1].bindee == NULL);
4939   for (i = A_NENV-1; i >= 1; i--)
4940      env[i] = env[i-1];
4941   env[0].binder   = binder;
4942   env[0].bindee   = bindee;
4943   env[0].doesLoad = False; /* filled in later */
4944   env[0].getInterval.present  = False; /* filled in later */
4945   env[0].getInterval.low  = -1; /* filled in later */
4946   env[0].getInterval.high = -1; /* filled in later */
4947}
4948
4949/* Given uses :: array of UShort, indexed by IRTemp
4950   Add the use-occurrences of temps in this expression
4951   to the env.
4952*/
4953static void aoccCount_Expr ( UShort* uses, IRExpr* e )
4954{
4955   Int i;
4956
4957   switch (e->tag) {
4958
4959      case Iex_RdTmp: /* the only interesting case */
4960         uses[e->Iex.RdTmp.tmp]++;
4961         return;
4962
4963      case Iex_ITE:
4964         aoccCount_Expr(uses, e->Iex.ITE.cond);
4965         aoccCount_Expr(uses, e->Iex.ITE.iftrue);
4966         aoccCount_Expr(uses, e->Iex.ITE.iffalse);
4967         return;
4968
4969      case Iex_Qop:
4970         aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
4971         aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
4972         aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
4973         aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
4974         return;
4975
4976      case Iex_Triop:
4977         aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
4978         aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
4979         aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
4980         return;
4981
4982      case Iex_Binop:
4983         aoccCount_Expr(uses, e->Iex.Binop.arg1);
4984         aoccCount_Expr(uses, e->Iex.Binop.arg2);
4985         return;
4986
4987      case Iex_Unop:
4988         aoccCount_Expr(uses, e->Iex.Unop.arg);
4989         return;
4990
4991      case Iex_Load:
4992         aoccCount_Expr(uses, e->Iex.Load.addr);
4993         return;
4994
4995      case Iex_CCall:
4996         for (i = 0; e->Iex.CCall.args[i]; i++)
4997            aoccCount_Expr(uses, e->Iex.CCall.args[i]);
4998         return;
4999
5000      case Iex_GetI:
5001         aoccCount_Expr(uses, e->Iex.GetI.ix);
5002         return;
5003
5004      case Iex_Const:
5005      case Iex_Get:
5006         return;
5007
5008      default:
5009         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5010         vpanic("aoccCount_Expr");
5011    }
5012}
5013
5014
5015/* Given uses :: array of UShort, indexed by IRTemp
5016   Add the use-occurrences of temps in this statement
5017   to the env.
5018*/
5019static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5020{
5021   Int      i;
5022   IRDirty* d;
5023   IRCAS*   cas;
5024   switch (st->tag) {
5025      case Ist_AbiHint:
5026         aoccCount_Expr(uses, st->Ist.AbiHint.base);
5027         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5028         return;
5029      case Ist_WrTmp:
5030         aoccCount_Expr(uses, st->Ist.WrTmp.data);
5031         return;
5032      case Ist_Put:
5033         aoccCount_Expr(uses, st->Ist.Put.data);
5034         return;
5035      case Ist_PutI:
5036         aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5037         aoccCount_Expr(uses, st->Ist.PutI.details->data);
5038         return;
5039      case Ist_Store:
5040         aoccCount_Expr(uses, st->Ist.Store.addr);
5041         aoccCount_Expr(uses, st->Ist.Store.data);
5042         return;
5043      case Ist_StoreG: {
5044         IRStoreG* sg = st->Ist.StoreG.details;
5045         aoccCount_Expr(uses, sg->addr);
5046         aoccCount_Expr(uses, sg->data);
5047         aoccCount_Expr(uses, sg->guard);
5048         return;
5049      }
5050      case Ist_LoadG: {
5051         IRLoadG* lg = st->Ist.LoadG.details;
5052         aoccCount_Expr(uses, lg->addr);
5053         aoccCount_Expr(uses, lg->alt);
5054         aoccCount_Expr(uses, lg->guard);
5055         return;
5056      }
5057      case Ist_CAS:
5058         cas = st->Ist.CAS.details;
5059         aoccCount_Expr(uses, cas->addr);
5060         if (cas->expdHi)
5061            aoccCount_Expr(uses, cas->expdHi);
5062         aoccCount_Expr(uses, cas->expdLo);
5063         if (cas->dataHi)
5064            aoccCount_Expr(uses, cas->dataHi);
5065         aoccCount_Expr(uses, cas->dataLo);
5066         return;
5067      case Ist_LLSC:
5068         aoccCount_Expr(uses, st->Ist.LLSC.addr);
5069         if (st->Ist.LLSC.storedata)
5070            aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5071         return;
5072      case Ist_Dirty:
5073         d = st->Ist.Dirty.details;
5074         if (d->mFx != Ifx_None)
5075            aoccCount_Expr(uses, d->mAddr);
5076         aoccCount_Expr(uses, d->guard);
5077         for (i = 0; d->args[i]; i++) {
5078            IRExpr* arg = d->args[i];
5079            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5080               aoccCount_Expr(uses, arg);
5081         }
5082         return;
5083      case Ist_NoOp:
5084      case Ist_IMark:
5085      case Ist_MBE:
5086         return;
5087      case Ist_Exit:
5088         aoccCount_Expr(uses, st->Ist.Exit.guard);
5089         return;
5090      default:
5091         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5092         vpanic("aoccCount_Stmt");
5093   }
5094}
5095
5096/* Look up a binding for tmp in the env.  If found, return the bound
5097   expression, and set the env's binding to NULL so it is marked as
5098   used.  If not found, return NULL. */
5099
5100static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5101{
5102   Int i;
5103   for (i = 0; i < A_NENV; i++) {
5104      if (env[i].binder == tmp && env[i].bindee != NULL) {
5105         IRExpr* bindee = env[i].bindee;
5106         env[i].bindee = NULL;
5107         return bindee;
5108      }
5109   }
5110   return NULL;
5111}
5112
5113/* Traverse e, looking for temps.  For each observed temp, see if env
5114   contains a binding for the temp, and if so return the bound value.
5115   The env has the property that any binding it holds is
5116   'single-shot', so once a binding is used, it is marked as no longer
5117   available, by setting its .bindee field to NULL. */
5118
5119static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5120   return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5121}
5122static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5123   return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5124}
5125
5126static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5127{
5128   switch (op) {
5129   case Iop_Or32:
5130      /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
5131      if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5132         return IRExpr_Unop( Iop_CmpwNEZ32,
5133                             IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5134                                                     a2->Iex.Unop.arg ) );
5135      break;
5136
5137   case Iop_CmpNE32:
5138      /* Since X has type Ity_I1 we can simplify:
5139         CmpNE32(1Uto32(X),0)) ==> X */
5140      if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5141         return a1->Iex.Unop.arg;
5142      break;
5143
5144   default:
5145      break;
5146   }
5147   /* no reduction rule applies */
5148   return IRExpr_Binop( op, a1, a2 );
5149}
5150
5151static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5152{
5153   switch (op) {
5154   case Iop_CmpwNEZ64:
5155      /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5156      if (is_Unop(aa, Iop_CmpwNEZ64))
5157         return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5158      /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5159      if (is_Binop(aa, Iop_Or64)
5160          && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5161         return fold_IRExpr_Unop(
5162                   Iop_CmpwNEZ64,
5163                   IRExpr_Binop(Iop_Or64,
5164                                aa->Iex.Binop.arg1->Iex.Unop.arg,
5165                                aa->Iex.Binop.arg2));
5166      /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5167      if (is_Binop(aa, Iop_Or64)
5168          && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5169         return fold_IRExpr_Unop(
5170                   Iop_CmpwNEZ64,
5171                   IRExpr_Binop(Iop_Or64,
5172                                aa->Iex.Binop.arg1,
5173                                aa->Iex.Binop.arg2->Iex.Unop.arg));
5174      break;
5175   case Iop_CmpNEZ64:
5176      /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5177      if (is_Unop(aa, Iop_Left64))
5178         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5179      /* CmpNEZ64( 1Uto64(X) ) --> X */
5180      if (is_Unop(aa, Iop_1Uto64))
5181         return aa->Iex.Unop.arg;
5182      break;
5183   case Iop_CmpwNEZ32:
5184      /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5185      if (is_Unop(aa, Iop_CmpwNEZ32))
5186         return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5187      break;
5188   case Iop_CmpNEZ32:
5189      /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5190      if (is_Unop(aa, Iop_Left32))
5191         return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5192      /* CmpNEZ32( 1Uto32(X) ) --> X */
5193      if (is_Unop(aa, Iop_1Uto32))
5194         return aa->Iex.Unop.arg;
5195      /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5196      if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5197         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5198      break;
5199   case Iop_CmpNEZ8:
5200      /* CmpNEZ8( 1Uto8(X) ) --> X */
5201      if (is_Unop(aa, Iop_1Uto8))
5202         return aa->Iex.Unop.arg;
5203      break;
5204   case Iop_Left32:
5205      /* Left32( Left32(x) ) --> Left32(x) */
5206      if (is_Unop(aa, Iop_Left32))
5207         return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5208      break;
5209   case Iop_Left64:
5210      /* Left64( Left64(x) ) --> Left64(x) */
5211      if (is_Unop(aa, Iop_Left64))
5212         return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5213      break;
5214   case Iop_32to1:
5215      /* 32to1( 1Uto32 ( x ) ) --> x */
5216      if (is_Unop(aa, Iop_1Uto32))
5217         return aa->Iex.Unop.arg;
5218      /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5219      if (is_Unop(aa, Iop_CmpwNEZ32))
5220         return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5221      break;
5222   case Iop_64to1:
5223      /* 64to1( 1Uto64 ( x ) ) --> x */
5224      if (is_Unop(aa, Iop_1Uto64))
5225         return aa->Iex.Unop.arg;
5226      /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5227      if (is_Unop(aa, Iop_CmpwNEZ64))
5228         return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5229      break;
5230   case Iop_64to32:
5231      /* 64to32( 32Uto64 ( x )) --> x */
5232      if (is_Unop(aa, Iop_32Uto64))
5233         return aa->Iex.Unop.arg;
5234      /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5235      if (is_Unop(aa, Iop_8Uto64))
5236         return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5237      break;
5238
5239   case Iop_32Uto64:
5240      /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5241      if (is_Unop(aa, Iop_8Uto32))
5242         return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5243      /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5244      if (is_Unop(aa, Iop_16Uto32))
5245         return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5246      /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5247                     --> Shr64( 32Uto64(64to32(x)), sh )) */
5248      if (is_Unop(aa, Iop_64to32)
5249          && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5250          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5251          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5252                     Iop_64to32)) {
5253         return aa->Iex.Unop.arg;
5254      }
5255      /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5256         --> 32Uto64(64to32( Shl64(                x,   sh )) */
5257      if (is_Unop(aa, Iop_64to32)
5258          && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5259          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5260          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5261                     Iop_64to32)) {
5262         return
5263            IRExpr_Unop(
5264               Iop_32Uto64,
5265               IRExpr_Unop(
5266                  Iop_64to32,
5267                  IRExpr_Binop(
5268                     Iop_Shl64,
5269                     aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5270                     aa->Iex.Unop.arg->Iex.Binop.arg2
5271            )));
5272      }
5273      break;
5274
5275   case Iop_1Sto32:
5276      /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5277      if (is_Unop(aa, Iop_CmpNEZ8)
5278          && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5279          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5280          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5281                     Iop_CmpNEZ32)) {
5282         return IRExpr_Unop( Iop_CmpwNEZ32,
5283                             aa->Iex.Unop.arg->Iex.Unop.arg
5284                               ->Iex.Unop.arg->Iex.Unop.arg);
5285      }
5286      break;
5287
5288   default:
5289      break;
5290   }
5291   /* no reduction rule applies */
5292   return IRExpr_Unop( op, aa );
5293}
5294
5295static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5296{
5297   IRExpr*  e2;
5298   IRExpr** args2;
5299   Int      i;
5300
5301   switch (e->tag) {
5302
5303      case Iex_CCall:
5304         args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5305         for (i = 0; args2[i]; i++)
5306            args2[i] = atbSubst_Expr(env,args2[i]);
5307         return IRExpr_CCall(
5308                   e->Iex.CCall.cee,
5309                   e->Iex.CCall.retty,
5310                   args2
5311                );
5312      case Iex_RdTmp:
5313         e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5314         return e2 ? e2 : e;
5315      case Iex_ITE:
5316         return IRExpr_ITE(
5317                   atbSubst_Expr(env, e->Iex.ITE.cond),
5318                   atbSubst_Expr(env, e->Iex.ITE.iftrue),
5319                   atbSubst_Expr(env, e->Iex.ITE.iffalse)
5320                );
5321      case Iex_Qop:
5322         return IRExpr_Qop(
5323                   e->Iex.Qop.details->op,
5324                   atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5325                   atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5326                   atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5327                   atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5328                );
5329      case Iex_Triop:
5330         return IRExpr_Triop(
5331                   e->Iex.Triop.details->op,
5332                   atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5333                   atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5334                   atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5335                );
5336      case Iex_Binop:
5337         return fold_IRExpr_Binop(
5338                   e->Iex.Binop.op,
5339                   atbSubst_Expr(env, e->Iex.Binop.arg1),
5340                   atbSubst_Expr(env, e->Iex.Binop.arg2)
5341                );
5342      case Iex_Unop:
5343         return fold_IRExpr_Unop(
5344                   e->Iex.Unop.op,
5345                   atbSubst_Expr(env, e->Iex.Unop.arg)
5346                );
5347      case Iex_Load:
5348         return IRExpr_Load(
5349                   e->Iex.Load.end,
5350                   e->Iex.Load.ty,
5351                   atbSubst_Expr(env, e->Iex.Load.addr)
5352                );
5353      case Iex_GetI:
5354         return IRExpr_GetI(
5355                   e->Iex.GetI.descr,
5356                   atbSubst_Expr(env, e->Iex.GetI.ix),
5357                   e->Iex.GetI.bias
5358                );
5359      case Iex_Const:
5360      case Iex_Get:
5361         return e;
5362      default:
5363         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5364         vpanic("atbSubst_Expr");
5365   }
5366}
5367
5368/* Same deal as atbSubst_Expr, except for stmts. */
5369
5370static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5371{
5372   Int     i;
5373   IRDirty *d, *d2;
5374   IRCAS   *cas, *cas2;
5375   IRPutI  *puti, *puti2;
5376
5377   switch (st->tag) {
5378      case Ist_AbiHint:
5379         return IRStmt_AbiHint(
5380                   atbSubst_Expr(env, st->Ist.AbiHint.base),
5381                   st->Ist.AbiHint.len,
5382                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
5383                );
5384      case Ist_Store:
5385         return IRStmt_Store(
5386                   st->Ist.Store.end,
5387                   atbSubst_Expr(env, st->Ist.Store.addr),
5388                   atbSubst_Expr(env, st->Ist.Store.data)
5389                );
5390      case Ist_StoreG: {
5391         IRStoreG* sg = st->Ist.StoreG.details;
5392         return IRStmt_StoreG(sg->end,
5393                              atbSubst_Expr(env, sg->addr),
5394                              atbSubst_Expr(env, sg->data),
5395                              atbSubst_Expr(env, sg->guard));
5396      }
5397      case Ist_LoadG: {
5398         IRLoadG* lg = st->Ist.LoadG.details;
5399         return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5400                             atbSubst_Expr(env, lg->addr),
5401                             atbSubst_Expr(env, lg->alt),
5402                             atbSubst_Expr(env, lg->guard));
5403      }
5404      case Ist_WrTmp:
5405         return IRStmt_WrTmp(
5406                   st->Ist.WrTmp.tmp,
5407                   atbSubst_Expr(env, st->Ist.WrTmp.data)
5408                );
5409      case Ist_Put:
5410         return IRStmt_Put(
5411                   st->Ist.Put.offset,
5412                   atbSubst_Expr(env, st->Ist.Put.data)
5413                );
5414      case Ist_PutI:
5415         puti  = st->Ist.PutI.details;
5416         puti2 = mkIRPutI(puti->descr,
5417                          atbSubst_Expr(env, puti->ix),
5418                          puti->bias,
5419                          atbSubst_Expr(env, puti->data));
5420         return IRStmt_PutI(puti2);
5421
5422      case Ist_Exit:
5423         return IRStmt_Exit(
5424                   atbSubst_Expr(env, st->Ist.Exit.guard),
5425                   st->Ist.Exit.jk,
5426                   st->Ist.Exit.dst,
5427                   st->Ist.Exit.offsIP
5428                );
5429      case Ist_IMark:
5430         return IRStmt_IMark(st->Ist.IMark.addr,
5431                             st->Ist.IMark.len,
5432                             st->Ist.IMark.delta);
5433      case Ist_NoOp:
5434         return IRStmt_NoOp();
5435      case Ist_MBE:
5436         return IRStmt_MBE(st->Ist.MBE.event);
5437      case Ist_CAS:
5438         cas  = st->Ist.CAS.details;
5439         cas2 = mkIRCAS(
5440                   cas->oldHi, cas->oldLo, cas->end,
5441                   atbSubst_Expr(env, cas->addr),
5442                   cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5443                   atbSubst_Expr(env, cas->expdLo),
5444                   cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5445                   atbSubst_Expr(env, cas->dataLo)
5446                );
5447         return IRStmt_CAS(cas2);
5448      case Ist_LLSC:
5449         return IRStmt_LLSC(
5450                   st->Ist.LLSC.end,
5451                   st->Ist.LLSC.result,
5452                   atbSubst_Expr(env, st->Ist.LLSC.addr),
5453                   st->Ist.LLSC.storedata
5454                      ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5455                );
5456      case Ist_Dirty:
5457         d  = st->Ist.Dirty.details;
5458         d2 = emptyIRDirty();
5459         *d2 = *d;
5460         if (d2->mFx != Ifx_None)
5461            d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5462         d2->guard = atbSubst_Expr(env, d2->guard);
5463         for (i = 0; d2->args[i]; i++) {
5464            IRExpr* arg = d2->args[i];
5465            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5466               d2->args[i] = atbSubst_Expr(env, arg);
5467         }
5468         return IRStmt_Dirty(d2);
5469      default:
5470         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5471         vpanic("atbSubst_Stmt");
5472   }
5473}
5474
5475inline
5476static Bool dirty_helper_stores ( const IRDirty *d )
5477{
5478   return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5479}
5480
5481inline
5482static Interval dirty_helper_puts ( const IRDirty *d,
5483                                    Bool (*preciseMemExnsFn)(Int, Int),
5484                                    Bool *requiresPreciseMemExns )
5485{
5486   Int i;
5487   Interval interval;
5488
5489   /* Passing the guest state pointer opens the door to modifying the
5490      guest state under the covers.  It's not allowed, but let's be
5491      extra conservative and assume the worst. */
5492   for (i = 0; d->args[i]; i++) {
5493      if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
5494         *requiresPreciseMemExns = True;
5495         /* Assume all guest state is written. */
5496         interval.present = True;
5497         interval.low  = 0;
5498         interval.high = 0x7FFFFFFF;
5499         return interval;
5500      }
5501   }
5502
5503   /* Check the side effects on the guest state */
5504   interval.present = False;
5505   interval.low = interval.high = -1;
5506   *requiresPreciseMemExns = False;
5507
5508   for (i = 0; i < d->nFxState; ++i) {
5509      if (d->fxState[i].fx != Ifx_Read) {
5510         Int offset = d->fxState[i].offset;
5511         Int size = d->fxState[i].size;
5512         Int nRepeats = d->fxState[i].nRepeats;
5513         Int repeatLen = d->fxState[i].repeatLen;
5514
5515         if (preciseMemExnsFn(offset,
5516                              offset + nRepeats * repeatLen + size - 1)) {
5517            *requiresPreciseMemExns = True;
5518         }
5519         update_interval(&interval, offset,
5520                         offset + nRepeats * repeatLen + size - 1);
5521      }
5522   }
5523
5524   return interval;
5525}
5526
5527/* Return an interval if st modifies the guest state. Via requiresPreciseMemExns
5528   return whether or not that modification requires precise exceptions. */
5529static Interval stmt_modifies_guest_state ( IRSB *bb, const IRStmt *st,
5530                                            Bool (*preciseMemExnsFn)(Int,Int),
5531                                            Bool *requiresPreciseMemExns )
5532{
5533   Interval interval;
5534
5535   switch (st->tag) {
5536   case Ist_Put: {
5537      Int offset = st->Ist.Put.offset;
5538      Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5539
5540      *requiresPreciseMemExns = preciseMemExnsFn(offset, offset + size - 1);
5541      interval.present = True;
5542      interval.low  = offset;
5543      interval.high = offset + size - 1;
5544      return interval;
5545   }
5546
5547   case Ist_PutI: {
5548      IRRegArray *descr = st->Ist.PutI.details->descr;
5549      Int offset = descr->base;
5550      Int size = sizeofIRType(descr->elemTy);
5551
5552      /* We quietly assume here that all segments are contiguous and there
5553         are no holes. This is to avoid a loop. The assumption is conservative
5554         in the sense that we might report that precise memory exceptions are
5555         needed when in fact they are not. */
5556      *requiresPreciseMemExns =
5557         preciseMemExnsFn(offset, offset + descr->nElems * size - 1);
5558      interval.present = True;
5559      interval.low  = offset;
5560      interval.high = offset + descr->nElems * size - 1;
5561      return interval;
5562   }
5563
5564   case Ist_Dirty:
5565      return dirty_helper_puts(st->Ist.Dirty.details, preciseMemExnsFn,
5566                               requiresPreciseMemExns);
5567
5568   default:
5569      *requiresPreciseMemExns = False;
5570      interval.present = False;
5571      interval.low  = -1;
5572      interval.high = -1;
5573      return interval;
5574   }
5575}
5576
5577/* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb,
5578                                          Bool (*preciseMemExnsFn)(Int,Int) )
5579{
5580   Int      i, j, k, m;
5581   Bool     stmtStores, invalidateMe;
5582   Interval putInterval;
5583   IRStmt*  st;
5584   IRStmt*  st2;
5585   ATmpInfo env[A_NENV];
5586
5587   Bool   max_ga_known = False;
5588   Addr64 max_ga       = 0;
5589
5590   Int       n_tmps = bb->tyenv->types_used;
5591   UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
5592
5593   /* Phase 1.  Scan forwards in bb, counting use occurrences of each
5594      temp.  Also count occurrences in the bb->next field.  Take the
5595      opportunity to also find the maximum guest address in the block,
5596      since that will be needed later for deciding when we can safely
5597      elide event checks. */
5598
5599   for (i = 0; i < n_tmps; i++)
5600      uses[i] = 0;
5601
5602   for (i = 0; i < bb->stmts_used; i++) {
5603      st = bb->stmts[i];
5604      switch (st->tag) {
5605         case Ist_NoOp:
5606            continue;
5607         case Ist_IMark: {
5608            Int    len = st->Ist.IMark.len;
5609            Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5610            max_ga_known = True;
5611            if (mga > max_ga)
5612               max_ga = mga;
5613            break;
5614         }
5615         default:
5616            break;
5617      }
5618      aoccCount_Stmt( uses, st );
5619   }
5620   aoccCount_Expr(uses, bb->next );
5621
5622#  if 0
5623   for (i = 0; i < n_tmps; i++) {
5624      if (uses[i] == 0)
5625        continue;
5626      ppIRTemp( (IRTemp)i );
5627      vex_printf("  used %d\n", (Int)uses[i] );
5628   }
5629#  endif
5630
5631   /* Phase 2.  Scan forwards in bb.  For each statement in turn:
5632
5633         If the env is full, emit the end element.  This guarantees
5634         there is at least one free slot in the following.
5635
5636         On seeing 't = E', occ(t)==1,
5637            let E'=env(E)
5638            delete this stmt
5639            add t -> E' to the front of the env
5640            Examine E' and set the hints for E' appropriately
5641              (doesLoad? doesGet?)
5642
5643         On seeing any other stmt,
5644            let stmt' = env(stmt)
5645            remove from env any 't=E' binds invalidated by stmt
5646                emit the invalidated stmts
5647            emit stmt'
5648            compact any holes in env
5649              by sliding entries towards the front
5650
5651      Finally, apply env to bb->next.
5652   */
5653
5654   for (i = 0; i < A_NENV; i++) {
5655      env[i].bindee = NULL;
5656      env[i].binder = IRTemp_INVALID;
5657   }
5658
5659   /* The stmts in bb are being reordered, and we are guaranteed to
5660      end up with no more than the number we started with.  Use i to
5661      be the cursor of the current stmt examined and j <= i to be that
5662      for the current stmt being written.
5663   */
5664   j = 0;
5665   for (i = 0; i < bb->stmts_used; i++) {
5666
5667      st = bb->stmts[i];
5668      if (st->tag == Ist_NoOp)
5669         continue;
5670
5671      /* Ensure there's at least one space in the env, by emitting
5672         the oldest binding if necessary. */
5673      if (env[A_NENV-1].bindee != NULL) {
5674         bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5675                                      env[A_NENV-1].bindee );
5676         j++;
5677         vassert(j <= i);
5678         env[A_NENV-1].bindee = NULL;
5679      }
5680
5681      /* Consider current stmt. */
5682      if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5683         IRExpr *e, *e2;
5684
5685         /* optional extra: dump dead bindings as we find them.
5686            Removes the need for a prior dead-code removal pass. */
5687         if (uses[st->Ist.WrTmp.tmp] == 0) {
5688	    if (0) vex_printf("DEAD binding\n");
5689            continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5690         }
5691         vassert(uses[st->Ist.WrTmp.tmp] == 1);
5692
5693         /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
5694            actions. */
5695         e  = st->Ist.WrTmp.data;
5696         e2 = atbSubst_Expr(env, e);
5697         addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5698         setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5699         /* don't advance j, as we are deleting this stmt and instead
5700            holding it temporarily in the env. */
5701         continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5702      }
5703
5704      /* we get here for any other kind of statement. */
5705      /* 'use up' any bindings required by the current statement. */
5706      st2 = atbSubst_Stmt(env, st);
5707
5708      /* Now, before this stmt, dump any bindings in env that it
5709         invalidates.  These need to be dumped in the order in which
5710         they originally entered env -- that means from oldest to
5711         youngest. */
5712
5713      /* putInterval/stmtStores characterise what the stmt under
5714         consideration does, or might do (sidely safe @ True). */
5715
5716      Bool putRequiresPreciseMemExns;
5717      putInterval = stmt_modifies_guest_state( bb, st, preciseMemExnsFn,
5718                                               &putRequiresPreciseMemExns);
5719
5720      /* be True if this stmt writes memory or might do (==> we don't
5721         want to reorder other loads or stores relative to it).  Also,
5722         both LL and SC fall under this classification, since we
5723         really ought to be conservative and not reorder any other
5724         memory transactions relative to them. */
5725      stmtStores
5726         = toBool( st->tag == Ist_Store
5727                   || (st->tag == Ist_Dirty
5728                       && dirty_helper_stores(st->Ist.Dirty.details))
5729                   || st->tag == Ist_LLSC
5730                   || st->tag == Ist_CAS );
5731
5732      for (k = A_NENV-1; k >= 0; k--) {
5733         if (env[k].bindee == NULL)
5734            continue;
5735         /* Compare the actions of this stmt with the actions of
5736            binding 'k', to see if they invalidate the binding. */
5737         invalidateMe
5738            = toBool(
5739              /* a store invalidates loaded data */
5740              (env[k].doesLoad && stmtStores)
5741              /* a put invalidates get'd data, if they overlap */
5742              || ((env[k].getInterval.present && putInterval.present) &&
5743                  intervals_overlap(env[k].getInterval, putInterval))
5744              /* a put invalidates loaded data. That means, in essense, that
5745                 a load expression cannot be substituted into a statement
5746                 that follows the put. But there is nothing wrong doing so
5747                 except when the put statement requries precise exceptions.
5748                 Think of a load that is moved past a put where the put
5749                 updates the IP in the guest state. If the load generates
5750                 a segfault, the wrong address (line number) would be
5751                 reported. */
5752              || (env[k].doesLoad && putInterval.present &&
5753                  putRequiresPreciseMemExns)
5754              /* probably overly conservative: a memory bus event
5755                 invalidates absolutely everything, so that all
5756                 computation prior to it is forced to complete before
5757                 proceeding with the event (fence,lock,unlock). */
5758              || st->tag == Ist_MBE
5759              /* also be (probably overly) paranoid re AbiHints */
5760              || st->tag == Ist_AbiHint
5761              );
5762         if (invalidateMe) {
5763            bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5764            j++;
5765            vassert(j <= i);
5766            env[k].bindee = NULL;
5767         }
5768      }
5769
5770      /* Slide in-use entries in env up to the front */
5771      m = 0;
5772      for (k = 0; k < A_NENV; k++) {
5773         if (env[k].bindee != NULL) {
5774            env[m] = env[k];
5775            m++;
5776	 }
5777      }
5778      for (m = m; m < A_NENV; m++) {
5779         env[m].bindee = NULL;
5780      }
5781
5782      /* finally, emit the substituted statement */
5783      bb->stmts[j] = st2;
5784      /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5785      j++;
5786
5787      vassert(j <= i+1);
5788   } /* for each stmt in the original bb ... */
5789
5790   /* Finally ... substitute the ->next field as much as possible, and
5791      dump any left-over bindings.  Hmm.  Perhaps there should be no
5792      left over bindings?  Or any left-over bindings are
5793      by definition dead? */
5794   bb->next = atbSubst_Expr(env, bb->next);
5795   bb->stmts_used = j;
5796
5797   return max_ga_known ? max_ga : ~(Addr64)0;
5798}
5799
5800
5801/*---------------------------------------------------------------*/
5802/*--- iropt main                                              ---*/
5803/*---------------------------------------------------------------*/
5804
5805static Bool iropt_verbose = False; /* True; */
5806
5807
5808/* Do a simple cleanup pass on bb.  This is: redundant Get removal,
5809   redundant Put removal, constant propagation, dead code removal,
5810   clean helper specialisation, and dead code removal (again).
5811*/
5812
5813
5814static
5815IRSB* cheap_transformations (
5816         IRSB* bb,
5817         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
5818         Bool (*preciseMemExnsFn)(Int,Int)
5819      )
5820{
5821   redundant_get_removal_BB ( bb );
5822   if (iropt_verbose) {
5823      vex_printf("\n========= REDUNDANT GET\n\n" );
5824      ppIRSB(bb);
5825   }
5826
5827   if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) {
5828      redundant_put_removal_BB ( bb, preciseMemExnsFn );
5829   }
5830   if (iropt_verbose) {
5831      vex_printf("\n========= REDUNDANT PUT\n\n" );
5832      ppIRSB(bb);
5833   }
5834
5835   bb = cprop_BB ( bb );
5836   if (iropt_verbose) {
5837      vex_printf("\n========= CPROPD\n\n" );
5838      ppIRSB(bb);
5839   }
5840
5841   do_deadcode_BB ( bb );
5842   if (iropt_verbose) {
5843      vex_printf("\n========= DEAD\n\n" );
5844      ppIRSB(bb);
5845   }
5846
5847   bb = spec_helpers_BB ( bb, specHelper );
5848   do_deadcode_BB ( bb );
5849   if (iropt_verbose) {
5850      vex_printf("\n========= SPECd \n\n" );
5851      ppIRSB(bb);
5852   }
5853
5854   return bb;
5855}
5856
5857
5858/* Do some more expensive transformations on bb, which are aimed at
5859   optimising as much as possible in the presence of GetI and PutI.  */
5860
5861static
5862IRSB* expensive_transformations( IRSB* bb )
5863{
5864   (void)do_cse_BB( bb );
5865   collapse_AddSub_chains_BB( bb );
5866   do_redundant_GetI_elimination( bb );
5867   if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) {
5868      do_redundant_PutI_elimination( bb );
5869   }
5870   do_deadcode_BB( bb );
5871   return bb;
5872}
5873
5874
5875/* Scan a flattened BB to look for signs that more expensive
5876   optimisations might be useful:
5877   - find out if there are any GetIs and PutIs
5878   - find out if there are any floating or vector-typed temporaries
5879*/
5880
5881static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
5882                                 /*OUT*/Bool* hasVorFtemps,
5883                                 IRSB* bb )
5884{
5885   Int      i, j;
5886   IRStmt*  st;
5887   IRDirty* d;
5888   IRCAS*   cas;
5889
5890   *hasGetIorPutI = False;
5891   *hasVorFtemps  = False;
5892
5893   for (i = 0; i < bb->stmts_used; i++) {
5894      st = bb->stmts[i];
5895      switch (st->tag) {
5896         case Ist_AbiHint:
5897            vassert(isIRAtom(st->Ist.AbiHint.base));
5898            vassert(isIRAtom(st->Ist.AbiHint.nia));
5899            break;
5900         case Ist_PutI:
5901            *hasGetIorPutI = True;
5902            break;
5903         case Ist_WrTmp:
5904            if (st->Ist.WrTmp.data->tag == Iex_GetI)
5905               *hasGetIorPutI = True;
5906            switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
5907               case Ity_I1: case Ity_I8: case Ity_I16:
5908               case Ity_I32: case Ity_I64: case Ity_I128:
5909                  break;
5910               case Ity_F32: case Ity_F64: case Ity_F128:
5911               case Ity_V128: case Ity_V256:
5912                  *hasVorFtemps = True;
5913                  break;
5914               case Ity_D32: case Ity_D64: case Ity_D128:
5915                  *hasVorFtemps = True;
5916                  break;
5917               default:
5918                  goto bad;
5919            }
5920            break;
5921         case Ist_Put:
5922            vassert(isIRAtom(st->Ist.Put.data));
5923            break;
5924         case Ist_Store:
5925            vassert(isIRAtom(st->Ist.Store.addr));
5926            vassert(isIRAtom(st->Ist.Store.data));
5927            break;
5928         case Ist_StoreG: {
5929            IRStoreG* sg = st->Ist.StoreG.details;
5930            vassert(isIRAtom(sg->addr));
5931            vassert(isIRAtom(sg->data));
5932            vassert(isIRAtom(sg->guard));
5933            break;
5934         }
5935         case Ist_LoadG: {
5936            IRLoadG* lg = st->Ist.LoadG.details;
5937            vassert(isIRAtom(lg->addr));
5938            vassert(isIRAtom(lg->alt));
5939            vassert(isIRAtom(lg->guard));
5940            break;
5941         }
5942         case Ist_CAS:
5943            cas = st->Ist.CAS.details;
5944            vassert(isIRAtom(cas->addr));
5945            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
5946            vassert(isIRAtom(cas->expdLo));
5947            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
5948            vassert(isIRAtom(cas->dataLo));
5949            break;
5950         case Ist_LLSC:
5951            vassert(isIRAtom(st->Ist.LLSC.addr));
5952            if (st->Ist.LLSC.storedata)
5953               vassert(isIRAtom(st->Ist.LLSC.storedata));
5954            break;
5955         case Ist_Dirty:
5956            d = st->Ist.Dirty.details;
5957            vassert(isIRAtom(d->guard));
5958            for (j = 0; d->args[j]; j++) {
5959               IRExpr* arg = d->args[j];
5960               if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5961                  vassert(isIRAtom(arg));
5962            }
5963            if (d->mFx != Ifx_None)
5964               vassert(isIRAtom(d->mAddr));
5965            break;
5966         case Ist_NoOp:
5967         case Ist_IMark:
5968         case Ist_MBE:
5969            break;
5970         case Ist_Exit:
5971            vassert(isIRAtom(st->Ist.Exit.guard));
5972            break;
5973         default:
5974         bad:
5975            ppIRStmt(st);
5976            vpanic("considerExpensives");
5977      }
5978   }
5979}
5980
5981
5982/* ---------------- The main iropt entry point. ---------------- */
5983
5984/* exported from this file */
5985/* Rules of the game:
5986
5987   - IRExpr/IRStmt trees should be treated as immutable, as they
5988     may get shared.  So never change a field of such a tree node;
5989     instead construct and return a new one if needed.
5990*/
5991
5992
5993IRSB* do_iropt_BB(
5994         IRSB* bb0,
5995         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
5996         Bool (*preciseMemExnsFn)(Int,Int),
5997         Addr64 guest_addr,
5998         VexArch guest_arch
5999      )
6000{
6001   static Int n_total     = 0;
6002   static Int n_expensive = 0;
6003
6004   Bool hasGetIorPutI, hasVorFtemps;
6005   IRSB *bb, *bb2;
6006
6007   n_total++;
6008
6009   /* First flatten the block out, since all other
6010      phases assume flat code. */
6011
6012   bb = flatten_BB ( bb0 );
6013
6014   if (iropt_verbose) {
6015      vex_printf("\n========= FLAT\n\n" );
6016      ppIRSB(bb);
6017   }
6018
6019   /* If at level 0, stop now. */
6020   if (vex_control.iropt_level <= 0) return bb;
6021
6022   /* Now do a preliminary cleanup pass, and figure out if we also
6023      need to do 'expensive' optimisations.  Expensive optimisations
6024      are deemed necessary if the block contains any GetIs or PutIs.
6025      If needed, do expensive transformations and then another cheap
6026      cleanup pass. */
6027
6028   bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
6029
6030   if (guest_arch == VexArchARM) {
6031      /* Translating Thumb2 code produces a lot of chaff.  We have to
6032         work extra hard to get rid of it. */
6033      bb = cprop_BB(bb);
6034      bb = spec_helpers_BB ( bb, specHelper );
6035      if (vex_control.iropt_register_updates < VexRegUpdAllregsAtEachInsn) {
6036         redundant_put_removal_BB ( bb, preciseMemExnsFn );
6037      }
6038      do_cse_BB( bb );
6039      do_deadcode_BB( bb );
6040   }
6041
6042   if (vex_control.iropt_level > 1) {
6043
6044      /* Peer at what we have, to decide how much more effort to throw
6045         at it. */
6046      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6047
6048      if (hasVorFtemps && !hasGetIorPutI) {
6049         /* If any evidence of FP or Vector activity, CSE, as that
6050            tends to mop up all manner of lardy code to do with
6051            rounding modes.  Don't bother if hasGetIorPutI since that
6052            case leads into the expensive transformations, which do
6053            CSE anyway. */
6054         (void)do_cse_BB( bb );
6055         do_deadcode_BB( bb );
6056      }
6057
6058      if (hasGetIorPutI) {
6059         Bool cses;
6060         n_expensive++;
6061         if (DEBUG_IROPT)
6062            vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6063         bb = expensive_transformations( bb );
6064         bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
6065         /* Potentially common up GetIs */
6066         cses = do_cse_BB( bb );
6067         if (cses)
6068            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
6069      }
6070
6071      /* Now have a go at unrolling simple (single-BB) loops.  If
6072         successful, clean up the results as much as possible. */
6073
6074      bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6075      if (bb2) {
6076         bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
6077         if (hasGetIorPutI) {
6078            bb = expensive_transformations( bb );
6079            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
6080         } else {
6081            /* at least do CSE and dead code removal */
6082            do_cse_BB( bb );
6083            do_deadcode_BB( bb );
6084         }
6085         if (0) vex_printf("vex iropt: unrolled a loop\n");
6086      }
6087
6088   }
6089
6090   return bb;
6091}
6092
6093
6094/*---------------------------------------------------------------*/
6095/*--- end                                            ir_opt.c ---*/
6096/*---------------------------------------------------------------*/
6097