1
2/*---------------------------------------------------------------*/
3/*--- begin                                          ir_opt.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2010 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
40#include "main_util.h"
41#include "main_globals.h"
42#include "ir_opt.h"
43
44
45/* Set to 1 for lots of debugging output. */
46#define DEBUG_IROPT 0
47
48
49/* What iropt does, 29 Dec 04.
50
51   It takes an IRSB and produces a new one with the same meaning,
52   defined thus:
53
54   After execution of the new BB, all guest state and guest memory is
55   the same as after execution of the original.  This is true
56   regardless of how the block was exited (at the end vs side exit).
57
58   In addition, parts of the guest state will be identical to that
59   created by execution of the original at the following observation
60   points:
61
62   * In a dirty helper call, any parts of the guest state that the
63     helper states that it reads or modifies will be up to date.
64     Also, guest memory will be up to date.  Parts of the guest state
65     not marked as being read or modified by the helper cannot be
66     assumed to be up-to-date at the point where the helper is called.
67
68   * Immediately prior to any load or store, those parts of the guest
69     state marked as requiring precise exceptions will be up to date.
70     Also, guest memory will be up to date.  Parts of the guest state
71     not marked as requiring precise exceptions cannot be assumed to
72     be up-to-date at the point of the load/store.
73
74   The relative order of loads and stores (including loads/stores of
75   guest memory done by dirty helpers annotated as such) is not
76   changed.  However, the relative order of loads with no intervening
77   stores/modifies may be changed.
78
79   Transformation order
80   ~~~~~~~~~~~~~~~~~~~~
81
82   There are three levels of optimisation, controlled by
83   vex_control.iropt_level.  Define first:
84
85   "Cheap transformations" are the following sequence:
86      * Redundant-Get removal
87      * Redundant-Put removal
88      * Constant propagation/folding
89      * Dead code removal
90      * Specialisation of clean helper functions
91      * Dead code removal
92
93   "Expensive transformations" are the following sequence:
94      * CSE
95      * Folding of add/sub chains
96      * Redundant-GetI removal
97      * Redundant-PutI removal
98      * Dead code removal
99
100   Then the transformations are as follows, as defined by
101   vex_control.iropt_level:
102
103   Level 0:
104      * Flatten into atomic form.
105
106   Level 1: the following sequence:
107      * Flatten into atomic form.
108      * Cheap transformations.
109
110   Level 2: the following sequence
111      * Flatten into atomic form.
112      * Cheap transformations.
113      * If block contains any floating or vector types, CSE.
114      * If block contains GetI or PutI, Expensive transformations.
115      * Try unrolling loops.  Three possible outcomes:
116        - No effect: do nothing more.
117        - Unrolled a loop, and block does not contain GetI or PutI:
118          Do: * CSE
119              * Dead code removal
120        - Unrolled a loop, and block contains GetI or PutI:
121          Do: * Expensive transformations
122              * Cheap transformations
123*/
124
125/* Implementation notes, 29 Dec 04.
126
127   TODO (important): I think rPutI removal ignores precise exceptions
128   and is therefore in a sense, wrong.  In the sense that PutIs are
129   assumed not to write parts of the guest state that we need to have
130   up-to-date at loads/stores.  So far on x86 guest that has not
131   mattered since indeed only the x87 FP registers and tags are
132   accessed using GetI/PutI, and there is no need so far for them to
133   be up to date at mem exception points.  The rPutI pass should be
134   fixed.
135
136   TODO: improve pessimistic handling of precise exceptions
137     in the tree builder.
138
139   TODO: check interaction of rGetI and dirty helpers.
140
141   F64i constants are treated differently from other constants.
142   They are not regarded as atoms, and instead lifted off and
143   bound to temps.  This allows them to participate in CSE, which
144   is important for getting good performance for x86 guest code.
145
146   CSE up F64 literals (already doing F64is)
147
148   CSE: consider carefully the requirement for precise exns
149        prior to making CSE any more aggressive.  */
150
151
152/*---------------------------------------------------------------*/
153/*--- Finite mappery, of a sort                               ---*/
154/*---------------------------------------------------------------*/
155
156/* General map from HWord-sized thing HWord-sized thing.  Could be by
157   hashing, but it's not clear whether or not this would really be any
158   faster. */
159
160typedef
161   struct {
162      Bool*  inuse;
163      HWord* key;
164      HWord* val;
165      Int    size;
166      Int    used;
167   }
168   HashHW;
169
170static HashHW* newHHW ( void )
171{
172   HashHW* h = LibVEX_Alloc(sizeof(HashHW));
173   h->size   = 8;
174   h->used   = 0;
175   h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
176   h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
177   h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
178   return h;
179}
180
181
182/* Look up key in the map. */
183
184static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
185{
186   Int i;
187   /* vex_printf("lookupHHW(%llx)\n", key ); */
188   for (i = 0; i < h->used; i++) {
189      if (h->inuse[i] && h->key[i] == key) {
190         if (val)
191            *val = h->val[i];
192         return True;
193      }
194   }
195   return False;
196}
197
198
199/* Add key->val to the map.  Replaces any existing binding for key. */
200
201static void addToHHW ( HashHW* h, HWord key, HWord val )
202{
203   Int i, j;
204   /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
205
206   /* Find and replace existing binding, if any. */
207   for (i = 0; i < h->used; i++) {
208      if (h->inuse[i] && h->key[i] == key) {
209         h->val[i] = val;
210         return;
211      }
212   }
213
214   /* Ensure a space is available. */
215   if (h->used == h->size) {
216      /* Copy into arrays twice the size. */
217      Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
218      HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
219      HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
220      for (i = j = 0; i < h->size; i++) {
221         if (!h->inuse[i]) continue;
222         inuse2[j] = True;
223         key2[j] = h->key[i];
224         val2[j] = h->val[i];
225         j++;
226      }
227      h->used = j;
228      h->size *= 2;
229      h->inuse = inuse2;
230      h->key = key2;
231      h->val = val2;
232   }
233
234   /* Finally, add it. */
235   vassert(h->used < h->size);
236   h->inuse[h->used] = True;
237   h->key[h->used] = key;
238   h->val[h->used] = val;
239   h->used++;
240}
241
242
243/*---------------------------------------------------------------*/
244/*--- Flattening out a BB into atomic SSA form                ---*/
245/*---------------------------------------------------------------*/
246
247/* Non-critical helper, heuristic for reducing the number of tmp-tmp
248   copies made by flattening.  If in doubt return False. */
249
250static Bool isFlat ( IRExpr* e )
251{
252   if (e->tag == Iex_Get)
253      return True;
254   if (e->tag == Iex_Binop)
255      return toBool( isIRAtom(e->Iex.Binop.arg1)
256                     && isIRAtom(e->Iex.Binop.arg2) );
257   if (e->tag == Iex_Load)
258      return isIRAtom(e->Iex.Load.addr);
259   return False;
260}
261
262/* Flatten out 'ex' so it is atomic, returning a new expression with
263   the same value, after having appended extra IRTemp assignments to
264   the end of 'bb'. */
265
266static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
267{
268   Int i;
269   IRExpr** newargs;
270   IRType ty = typeOfIRExpr(bb->tyenv, ex);
271   IRTemp t1;
272
273   switch (ex->tag) {
274
275      case Iex_GetI:
276         t1 = newIRTemp(bb->tyenv, ty);
277         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
278            IRExpr_GetI(ex->Iex.GetI.descr,
279                        flatten_Expr(bb, ex->Iex.GetI.ix),
280                        ex->Iex.GetI.bias)));
281         return IRExpr_RdTmp(t1);
282
283      case Iex_Get:
284         t1 = newIRTemp(bb->tyenv, ty);
285         addStmtToIRSB(bb,
286            IRStmt_WrTmp(t1, ex));
287         return IRExpr_RdTmp(t1);
288
289      case Iex_Qop:
290         t1 = newIRTemp(bb->tyenv, ty);
291         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
292            IRExpr_Qop(ex->Iex.Qop.op,
293                         flatten_Expr(bb, ex->Iex.Qop.arg1),
294                         flatten_Expr(bb, ex->Iex.Qop.arg2),
295                         flatten_Expr(bb, ex->Iex.Qop.arg3),
296                         flatten_Expr(bb, ex->Iex.Qop.arg4))));
297         return IRExpr_RdTmp(t1);
298
299      case Iex_Triop:
300         t1 = newIRTemp(bb->tyenv, ty);
301         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
302            IRExpr_Triop(ex->Iex.Triop.op,
303                         flatten_Expr(bb, ex->Iex.Triop.arg1),
304                         flatten_Expr(bb, ex->Iex.Triop.arg2),
305                         flatten_Expr(bb, ex->Iex.Triop.arg3))));
306         return IRExpr_RdTmp(t1);
307
308      case Iex_Binop:
309         t1 = newIRTemp(bb->tyenv, ty);
310         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
311            IRExpr_Binop(ex->Iex.Binop.op,
312                         flatten_Expr(bb, ex->Iex.Binop.arg1),
313                         flatten_Expr(bb, ex->Iex.Binop.arg2))));
314         return IRExpr_RdTmp(t1);
315
316      case Iex_Unop:
317         t1 = newIRTemp(bb->tyenv, ty);
318         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
319            IRExpr_Unop(ex->Iex.Unop.op,
320                        flatten_Expr(bb, ex->Iex.Unop.arg))));
321         return IRExpr_RdTmp(t1);
322
323      case Iex_Load:
324         t1 = newIRTemp(bb->tyenv, ty);
325         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
326            IRExpr_Load(ex->Iex.Load.end,
327                        ex->Iex.Load.ty,
328                        flatten_Expr(bb, ex->Iex.Load.addr))));
329         return IRExpr_RdTmp(t1);
330
331      case Iex_CCall:
332         newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
333         for (i = 0; newargs[i]; i++)
334            newargs[i] = flatten_Expr(bb, newargs[i]);
335         t1 = newIRTemp(bb->tyenv, ty);
336         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
337            IRExpr_CCall(ex->Iex.CCall.cee,
338                         ex->Iex.CCall.retty,
339                         newargs)));
340         return IRExpr_RdTmp(t1);
341
342      case Iex_Mux0X:
343         t1 = newIRTemp(bb->tyenv, ty);
344         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
345            IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
346                         flatten_Expr(bb, ex->Iex.Mux0X.expr0),
347                         flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
348         return IRExpr_RdTmp(t1);
349
350      case Iex_Const:
351         /* Lift F64i constants out onto temps so they can be CSEd
352            later. */
353         if (ex->Iex.Const.con->tag == Ico_F64i) {
354            t1 = newIRTemp(bb->tyenv, ty);
355            addStmtToIRSB(bb, IRStmt_WrTmp(t1,
356               IRExpr_Const(ex->Iex.Const.con)));
357            return IRExpr_RdTmp(t1);
358         } else {
359            /* Leave all other constants alone. */
360            return ex;
361         }
362
363      case Iex_RdTmp:
364         return ex;
365
366      default:
367         vex_printf("\n");
368         ppIRExpr(ex);
369         vex_printf("\n");
370         vpanic("flatten_Expr");
371   }
372}
373
374
375/* Append a completely flattened form of 'st' to the end of 'bb'. */
376
377static void flatten_Stmt ( IRSB* bb, IRStmt* st )
378{
379   Int i;
380   IRExpr  *e1, *e2, *e3, *e4, *e5;
381   IRDirty *d,  *d2;
382   IRCAS   *cas, *cas2;
383   switch (st->tag) {
384      case Ist_Put:
385         if (isIRAtom(st->Ist.Put.data)) {
386            /* optimisation to reduce the amount of heap wasted
387               by the flattener */
388            addStmtToIRSB(bb, st);
389         } else {
390            /* general case, always correct */
391            e1 = flatten_Expr(bb, st->Ist.Put.data);
392            addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
393         }
394         break;
395      case Ist_PutI:
396         e1 = flatten_Expr(bb, st->Ist.PutI.ix);
397         e2 = flatten_Expr(bb, st->Ist.PutI.data);
398         addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
399                                       e1,
400                                       st->Ist.PutI.bias,
401                                       e2));
402         break;
403      case Ist_WrTmp:
404         if (isFlat(st->Ist.WrTmp.data)) {
405            /* optimisation, to reduce the number of tmp-tmp
406               copies generated */
407            addStmtToIRSB(bb, st);
408         } else {
409            /* general case, always correct */
410            e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
411            addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
412         }
413         break;
414      case Ist_Store:
415         e1 = flatten_Expr(bb, st->Ist.Store.addr);
416         e2 = flatten_Expr(bb, st->Ist.Store.data);
417         addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
418         break;
419      case Ist_CAS:
420         cas  = st->Ist.CAS.details;
421         e1   = flatten_Expr(bb, cas->addr);
422         e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
423         e3   = flatten_Expr(bb, cas->expdLo);
424         e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
425         e5   = flatten_Expr(bb, cas->dataLo);
426         cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
427                         e1, e2, e3, e4, e5 );
428         addStmtToIRSB(bb, IRStmt_CAS(cas2));
429         break;
430      case Ist_LLSC:
431         e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
432         e2 = st->Ist.LLSC.storedata
433                 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
434                 : NULL;
435         addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
436                                       st->Ist.LLSC.result, e1, e2));
437         break;
438      case Ist_Dirty:
439         d = st->Ist.Dirty.details;
440         d2 = emptyIRDirty();
441         *d2 = *d;
442         d2->args = shallowCopyIRExprVec(d2->args);
443         if (d2->mFx != Ifx_None) {
444            d2->mAddr = flatten_Expr(bb, d2->mAddr);
445         } else {
446            vassert(d2->mAddr == NULL);
447         }
448         d2->guard = flatten_Expr(bb, d2->guard);
449         for (i = 0; d2->args[i]; i++)
450            d2->args[i] = flatten_Expr(bb, d2->args[i]);
451         addStmtToIRSB(bb, IRStmt_Dirty(d2));
452         break;
453      case Ist_NoOp:
454      case Ist_MBE:
455      case Ist_IMark:
456         addStmtToIRSB(bb, st);
457         break;
458      case Ist_AbiHint:
459         e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
460         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
461         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
462         break;
463      case Ist_Exit:
464         e1 = flatten_Expr(bb, st->Ist.Exit.guard);
465         addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
466                                           st->Ist.Exit.dst));
467         break;
468      default:
469         vex_printf("\n");
470         ppIRStmt(st);
471         vex_printf("\n");
472         vpanic("flatten_Stmt");
473   }
474}
475
476
477static IRSB* flatten_BB ( IRSB* in )
478{
479   Int   i;
480   IRSB* out;
481   out = emptyIRSB();
482   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
483   for (i = 0; i < in->stmts_used; i++)
484      if (in->stmts[i])
485         flatten_Stmt( out, in->stmts[i] );
486   out->next     = flatten_Expr( out, in->next );
487   out->jumpkind = in->jumpkind;
488   return out;
489}
490
491
492/*---------------------------------------------------------------*/
493/*--- In-place removal of redundant GETs                      ---*/
494/*---------------------------------------------------------------*/
495
496/* Scan forwards, building up an environment binding (min offset, max
497   offset) pairs to values, which will either be temps or constants.
498
499   On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
500   env and if it matches, replace the Get with the stored value.  If
501   there is no match, add a (minoff,maxoff) :-> t binding.
502
503   On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
504   any binding which fully or partially overlaps with (minoff,maxoff).
505   Then add a new (minoff,maxoff) :-> t or c binding.  */
506
507/* Extract the min/max offsets from a guest state array descriptor. */
508
509inline
510static void getArrayBounds ( IRRegArray* descr,
511                             UInt* minoff, UInt* maxoff )
512{
513   *minoff = descr->base;
514   *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
515   vassert((*minoff & ~0xFFFF) == 0);
516   vassert((*maxoff & ~0xFFFF) == 0);
517   vassert(*minoff <= *maxoff);
518}
519
520/* Create keys, of the form ((minoffset << 16) | maxoffset). */
521
522static UInt mk_key_GetPut ( Int offset, IRType ty )
523{
524   /* offset should fit in 16 bits. */
525   UInt minoff = offset;
526   UInt maxoff = minoff + sizeofIRType(ty) - 1;
527   vassert((minoff & ~0xFFFF) == 0);
528   vassert((maxoff & ~0xFFFF) == 0);
529   return (minoff << 16) | maxoff;
530}
531
532static UInt mk_key_GetIPutI ( IRRegArray* descr )
533{
534   UInt minoff, maxoff;
535   getArrayBounds( descr, &minoff, &maxoff );
536   vassert((minoff & ~0xFFFF) == 0);
537   vassert((maxoff & ~0xFFFF) == 0);
538   return (minoff << 16) | maxoff;
539}
540
541/* Supposing h has keys of the form generated by mk_key_GetPut and
542   mk_key_GetIPutI, invalidate any key which overlaps (k_lo
543   .. k_hi).
544*/
545static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
546{
547   Int  j;
548   UInt e_lo, e_hi;
549   vassert(k_lo <= k_hi);
550   /* invalidate any env entries which in any way overlap (k_lo
551      .. k_hi) */
552   /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
553
554   for (j = 0; j < h->used; j++) {
555      if (!h->inuse[j])
556         continue;
557      e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
558      e_hi = ((UInt)h->key[j]) & 0xFFFF;
559      vassert(e_lo <= e_hi);
560      if (e_hi < k_lo || k_hi < e_lo)
561         continue; /* no overlap possible */
562      else
563         /* overlap; invalidate */
564         h->inuse[j] = False;
565   }
566}
567
568
569static void redundant_get_removal_BB ( IRSB* bb )
570{
571   HashHW* env = newHHW();
572   UInt    key = 0; /* keep gcc -O happy */
573   Int     i, j;
574   HWord   val;
575
576   for (i = 0; i < bb->stmts_used; i++) {
577      IRStmt* st = bb->stmts[i];
578
579      if (st->tag == Ist_NoOp)
580         continue;
581
582      /* Deal with Gets */
583      if (st->tag == Ist_WrTmp
584          && st->Ist.WrTmp.data->tag == Iex_Get) {
585         /* st is 't = Get(...)'.  Look up in the environment and see
586            if the Get can be replaced. */
587         IRExpr* get = st->Ist.WrTmp.data;
588         key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
589                                     get->Iex.Get.ty );
590         if (lookupHHW(env, &val, (HWord)key)) {
591            /* found it */
592            /* Note, we could do better here.  If the types are
593               different we don't do the substitution, since doing so
594               could lead to invalidly-typed IR.  An improvement would
595               be to stick in a reinterpret-style cast, although that
596               would make maintaining flatness more difficult. */
597            IRExpr* valE    = (IRExpr*)val;
598            Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
599                                      == st->Ist.WrTmp.data->Iex.Get.ty );
600            if (typesOK && DEBUG_IROPT) {
601               vex_printf("rGET: "); ppIRExpr(get);
602               vex_printf("  ->  "); ppIRExpr(valE);
603               vex_printf("\n");
604            }
605            if (typesOK)
606               bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
607         } else {
608            /* Not found, but at least we know that t and the Get(...)
609               are now associated.  So add a binding to reflect that
610               fact. */
611            addToHHW( env, (HWord)key,
612                           (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
613         }
614      }
615
616      /* Deal with Puts: invalidate any env entries overlapped by this
617         Put */
618      if (st->tag == Ist_Put || st->tag == Ist_PutI) {
619         UInt k_lo, k_hi;
620         if (st->tag == Ist_Put) {
621            key = mk_key_GetPut( st->Ist.Put.offset,
622                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
623         } else {
624            vassert(st->tag == Ist_PutI);
625            key = mk_key_GetIPutI( st->Ist.PutI.descr );
626         }
627
628         k_lo = (key >> 16) & 0xFFFF;
629         k_hi = key & 0xFFFF;
630         invalidateOverlaps(env, k_lo, k_hi);
631      }
632      else
633      if (st->tag == Ist_Dirty) {
634         /* Deal with dirty helpers which write or modify guest state.
635            Invalidate the entire env.  We could do a lot better
636            here. */
637         IRDirty* d      = st->Ist.Dirty.details;
638         Bool     writes = False;
639         for (j = 0; j < d->nFxState; j++) {
640            if (d->fxState[j].fx == Ifx_Modify
641                || d->fxState[j].fx == Ifx_Write)
642            writes = True;
643         }
644         if (writes) {
645            /* dump the entire env (not clever, but correct ...) */
646            for (j = 0; j < env->used; j++)
647               env->inuse[j] = False;
648            if (0) vex_printf("rGET: trash env due to dirty helper\n");
649         }
650      }
651
652      /* add this one to the env, if appropriate */
653      if (st->tag == Ist_Put) {
654         vassert(isIRAtom(st->Ist.Put.data));
655         addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
656      }
657
658   } /* for (i = 0; i < bb->stmts_used; i++) */
659
660}
661
662
663/*---------------------------------------------------------------*/
664/*--- In-place removal of redundant PUTs                      ---*/
665/*---------------------------------------------------------------*/
666
667/* Find any Get uses in st and invalidate any partially or fully
668   overlapping ranges listed in env.  Due to the flattening phase, the
669   only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
670
671static void handle_gets_Stmt (
672               HashHW* env,
673               IRStmt* st,
674               Bool (*preciseMemExnsFn)(Int,Int)
675            )
676{
677   Int     j;
678   UInt    key = 0; /* keep gcc -O happy */
679   Bool    isGet;
680   Bool    memRW = False;
681   IRExpr* e;
682
683   switch (st->tag) {
684
685      /* This is the only interesting case.  Deal with Gets in the RHS
686         expression. */
687      case Ist_WrTmp:
688         e = st->Ist.WrTmp.data;
689         switch (e->tag) {
690            case Iex_Get:
691               isGet = True;
692               key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
693               break;
694            case Iex_GetI:
695               isGet = True;
696               key = mk_key_GetIPutI ( e->Iex.GetI.descr );
697               break;
698            case Iex_Load:
699               isGet = False;
700               memRW = True;
701               break;
702            default:
703               isGet = False;
704         }
705         if (isGet) {
706            UInt k_lo, k_hi;
707            k_lo = (key >> 16) & 0xFFFF;
708            k_hi = key & 0xFFFF;
709            invalidateOverlaps(env, k_lo, k_hi);
710         }
711         break;
712
713      /* Be very conservative for dirty helper calls; dump the entire
714         environment.  The helper might read guest state, in which
715         case it needs to be flushed first.  Also, the helper might
716         access guest memory, in which case all parts of the guest
717         state requiring precise exceptions needs to be flushed.  The
718         crude solution is just to flush everything; we could easily
719         enough do a lot better if needed. */
720      /* Probably also overly-conservative, but also dump everything
721         if we hit a memory bus event (fence, lock, unlock).  Ditto
722         AbiHints, CASs, LLs and SCs. */
723      case Ist_AbiHint:
724         vassert(isIRAtom(st->Ist.AbiHint.base));
725         vassert(isIRAtom(st->Ist.AbiHint.nia));
726         /* fall through */
727      case Ist_MBE:
728      case Ist_Dirty:
729      case Ist_CAS:
730      case Ist_LLSC:
731         for (j = 0; j < env->used; j++)
732            env->inuse[j] = False;
733         break;
734
735      /* all other cases are boring. */
736      case Ist_Store:
737         vassert(isIRAtom(st->Ist.Store.addr));
738         vassert(isIRAtom(st->Ist.Store.data));
739         memRW = True;
740         break;
741
742      case Ist_Exit:
743         vassert(isIRAtom(st->Ist.Exit.guard));
744         break;
745
746      case Ist_PutI:
747         vassert(isIRAtom(st->Ist.PutI.ix));
748         vassert(isIRAtom(st->Ist.PutI.data));
749         break;
750
751      case Ist_NoOp:
752      case Ist_IMark:
753         break;
754
755      default:
756         vex_printf("\n");
757         ppIRStmt(st);
758         vex_printf("\n");
759         vpanic("handle_gets_Stmt");
760   }
761
762   if (memRW) {
763      /* This statement accesses memory.  So we need to dump all parts
764         of the environment corresponding to guest state that may not
765         be reordered with respect to memory references.  That means
766         at least the stack pointer. */
767      for (j = 0; j < env->used; j++) {
768         if (!env->inuse[j])
769            continue;
770         if (vex_control.iropt_precise_memory_exns) {
771            /* Precise exceptions required.  Flush all guest state. */
772            env->inuse[j] = False;
773         } else {
774            /* Just flush the minimal amount required, as computed by
775               preciseMemExnsFn. */
776            HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
777            HWord k_hi = env->key[j] & 0xFFFF;
778            if (preciseMemExnsFn( k_lo, k_hi ))
779               env->inuse[j] = False;
780         }
781      }
782   } /* if (memRW) */
783
784}
785
786
787/* Scan backwards, building up a set of (min offset, max
788   offset) pairs, indicating those parts of the guest state
789   for which the next event is a write.
790
791   On seeing a conditional exit, empty the set.
792
793   On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
794   completely within the set, remove the Put.  Otherwise, add
795   (minoff,maxoff) to the set.
796
797   On seeing 'Get (minoff,maxoff)', remove any part of the set
798   overlapping (minoff,maxoff).  The same has to happen for any events
799   which implicitly read parts of the guest state: dirty helper calls
800   and loads/stores.
801*/
802
803static void redundant_put_removal_BB (
804               IRSB* bb,
805               Bool (*preciseMemExnsFn)(Int,Int)
806            )
807{
808   Int     i, j;
809   Bool    isPut;
810   IRStmt* st;
811   UInt    key = 0; /* keep gcc -O happy */
812
813   HashHW* env = newHHW();
814   for (i = bb->stmts_used-1; i >= 0; i--) {
815      st = bb->stmts[i];
816
817      if (st->tag == Ist_NoOp)
818         continue;
819
820      /* Deal with conditional exits. */
821      if (st->tag == Ist_Exit) {
822         /* Since control may not get beyond this point, we must empty
823            out the set, since we can no longer claim that the next
824            event for any part of the guest state is definitely a
825            write. */
826         vassert(isIRAtom(st->Ist.Exit.guard));
827         for (j = 0; j < env->used; j++)
828            env->inuse[j] = False;
829         continue;
830      }
831
832      /* Deal with Puts */
833      switch (st->tag) {
834         case Ist_Put:
835            isPut = True;
836            key = mk_key_GetPut( st->Ist.Put.offset,
837                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
838            vassert(isIRAtom(st->Ist.Put.data));
839            break;
840         case Ist_PutI:
841            isPut = True;
842            key = mk_key_GetIPutI( st->Ist.PutI.descr );
843            vassert(isIRAtom(st->Ist.PutI.ix));
844            vassert(isIRAtom(st->Ist.PutI.data));
845            break;
846         default:
847            isPut = False;
848      }
849      if (isPut && st->tag != Ist_PutI) {
850         /* See if any single entry in env overlaps this Put.  This is
851            simplistic in that the transformation is valid if, say, two
852            or more entries in the env overlap this Put, but the use of
853            lookupHHW will only find a single entry which exactly
854            overlaps this Put.  This is suboptimal but safe. */
855         if (lookupHHW(env, NULL, (HWord)key)) {
856            /* This Put is redundant because a later one will overwrite
857               it.  So NULL (nop) it out. */
858            if (DEBUG_IROPT) {
859               vex_printf("rPUT: "); ppIRStmt(st);
860               vex_printf("\n");
861            }
862            bb->stmts[i] = IRStmt_NoOp();
863         } else {
864            /* We can't demonstrate that this Put is redundant, so add it
865               to the running collection. */
866            addToHHW(env, (HWord)key, 0);
867         }
868         continue;
869      }
870
871      /* Deal with Gets.  These remove bits of the environment since
872         appearance of a Get means that the next event for that slice
873         of the guest state is no longer a write, but a read.  Also
874         deals with implicit reads of guest state needed to maintain
875         precise exceptions. */
876      handle_gets_Stmt( env, st, preciseMemExnsFn );
877   }
878}
879
880
881/*---------------------------------------------------------------*/
882/*--- Constant propagation and folding                        ---*/
883/*---------------------------------------------------------------*/
884
885/* The env in this section is a map from IRTemp to IRExpr*,
886   that is, an array indexed by IRTemp. */
887
888/* Are both expressions simply the same IRTemp ? */
889static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
890{
891   return toBool( e1->tag == Iex_RdTmp
892                  && e2->tag == Iex_RdTmp
893                  && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
894}
895
896static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
897{
898   return toBool( e1->tag == Iex_Const
899                  && e2->tag == Iex_Const
900                  && e1->Iex.Const.con->tag == Ico_U32
901                  && e2->Iex.Const.con->tag == Ico_U32
902                  && e1->Iex.Const.con->Ico.U32
903                     == e2->Iex.Const.con->Ico.U32 );
904}
905
906/* Are both expressions either the same IRTemp or IRConst-U32s ?  If
907   in doubt, say No. */
908static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
909{
910   switch (e1->tag) {
911      case Iex_RdTmp:
912         return sameIRTemps(e1, e2);
913      case Iex_Const:
914         return sameIcoU32s(e1, e2);
915      default:
916         return False;
917   }
918}
919
920static Bool notBool ( Bool b )
921{
922   if (b == True) return False;
923   if (b == False) return True;
924   vpanic("notBool");
925}
926
927/* Make a zero which has the same type as the result of the given
928   primop. */
929static IRExpr* mkZeroOfPrimopResultType ( IROp op )
930{
931   switch (op) {
932      case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
933      case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
934      case Iop_Sub32:
935      case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
936      case Iop_Sub64:
937      case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
938      case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
939      default: vpanic("mkZeroOfPrimopResultType: bad primop");
940   }
941}
942
943/* Make a value containing all 1-bits, which has the same type as the
944   result of the given primop. */
945static IRExpr* mkOnesOfPrimopResultType ( IROp op )
946{
947   switch (op) {
948      case Iop_CmpEQ64:
949         return IRExpr_Const(IRConst_U1(toBool(1)));
950      case Iop_CmpEQ8x8:
951         return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
952      case Iop_CmpEQ8x16:
953         return IRExpr_Const(IRConst_V128(0xFFFF));
954      default:
955         vpanic("mkOnesOfPrimopResultType: bad primop");
956   }
957}
958
959
960static IRExpr* fold_Expr ( IRExpr* e )
961{
962   Int     shift;
963   IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
964
965   /* UNARY ops */
966   if (e->tag == Iex_Unop
967       && e->Iex.Unop.arg->tag == Iex_Const) {
968      switch (e->Iex.Unop.op) {
969         case Iop_1Uto8:
970            e2 = IRExpr_Const(IRConst_U8(toUChar(
971                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
972                    ? 1 : 0)));
973            break;
974         case Iop_1Uto32:
975            e2 = IRExpr_Const(IRConst_U32(
976                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
977                    ? 1 : 0));
978            break;
979         case Iop_1Uto64:
980            e2 = IRExpr_Const(IRConst_U64(
981                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
982                    ? 1 : 0));
983            break;
984
985         case Iop_1Sto8:
986            e2 = IRExpr_Const(IRConst_U8(toUChar(
987                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
988                    ? 0xFF : 0)));
989            break;
990         case Iop_1Sto16:
991            e2 = IRExpr_Const(IRConst_U16(toUShort(
992                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
993                    ? 0xFFFF : 0)));
994            break;
995         case Iop_1Sto32:
996            e2 = IRExpr_Const(IRConst_U32(
997                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
998                    ? 0xFFFFFFFF : 0));
999            break;
1000         case Iop_1Sto64:
1001            e2 = IRExpr_Const(IRConst_U64(
1002                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1003                    ? 0xFFFFFFFFFFFFFFFFULL : 0));
1004            break;
1005
1006         case Iop_8Sto32: {
1007            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1008            s32 <<= 24;
1009            s32 >>= 24;
1010            e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1011            break;
1012         }
1013         case Iop_8Uto64:
1014            e2 = IRExpr_Const(IRConst_U64(
1015                    0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1016            break;
1017         case Iop_16Uto64:
1018            e2 = IRExpr_Const(IRConst_U64(
1019                    0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1020            break;
1021         case Iop_8Uto32:
1022            e2 = IRExpr_Const(IRConst_U32(
1023                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1024            break;
1025         case Iop_16Uto32:
1026            e2 = IRExpr_Const(IRConst_U32(
1027                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1028            break;
1029         case Iop_32to16:
1030            e2 = IRExpr_Const(IRConst_U16(toUShort(
1031                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1032            break;
1033         case Iop_32to8:
1034            e2 = IRExpr_Const(IRConst_U8(toUChar(
1035                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1036            break;
1037         case Iop_32to1:
1038            e2 = IRExpr_Const(IRConst_U1(toBool(
1039                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1040                 )));
1041            break;
1042         case Iop_64to1:
1043            e2 = IRExpr_Const(IRConst_U1(toBool(
1044                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1045                 )));
1046            break;
1047
1048         case Iop_Not64:
1049            e2 = IRExpr_Const(IRConst_U64(
1050                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1051            break;
1052         case Iop_Not32:
1053            e2 = IRExpr_Const(IRConst_U32(
1054                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1055            break;
1056         case Iop_Not16:
1057            e2 = IRExpr_Const(IRConst_U16(toUShort(
1058                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1059            break;
1060         case Iop_Not8:
1061            e2 = IRExpr_Const(IRConst_U8(toUChar(
1062                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1063            break;
1064
1065         case Iop_Not1:
1066            e2 = IRExpr_Const(IRConst_U1(
1067                    notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1068            break;
1069
1070         case Iop_64to8: {
1071            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1072            w64 &= 0xFFULL;
1073            e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1074            break;
1075         }
1076         case Iop_64to16: {
1077            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1078            w64 &= 0xFFFFULL;
1079            e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1080            break;
1081         }
1082         case Iop_64to32: {
1083            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1084            w64 &= 0x00000000FFFFFFFFULL;
1085            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1086            break;
1087         }
1088         case Iop_64HIto32: {
1089            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1090            w64 >>= 32;
1091            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1092            break;
1093         }
1094         case Iop_32Uto64:
1095            e2 = IRExpr_Const(IRConst_U64(
1096                    0xFFFFFFFFULL
1097                    & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1098            break;
1099         case Iop_32Sto64: {
1100            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1101            s64 <<= 32;
1102            s64 >>= 32;
1103            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1104            break;
1105         }
1106         case Iop_CmpNEZ8:
1107            e2 = IRExpr_Const(IRConst_U1(toBool(
1108                    0 !=
1109                    (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1110                 )));
1111            break;
1112         case Iop_CmpNEZ32:
1113            e2 = IRExpr_Const(IRConst_U1(toBool(
1114                    0 !=
1115                    (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1116                 )));
1117            break;
1118         case Iop_CmpNEZ64:
1119            e2 = IRExpr_Const(IRConst_U1(toBool(
1120                    0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1121                 )));
1122            break;
1123
1124         case Iop_CmpwNEZ32: {
1125            UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1126            if (w32 == 0)
1127               e2 = IRExpr_Const(IRConst_U32( 0 ));
1128            else
1129               e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1130            break;
1131         }
1132         case Iop_CmpwNEZ64: {
1133            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1134            if (w64 == 0)
1135               e2 = IRExpr_Const(IRConst_U64( 0 ));
1136            else
1137               e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1138            break;
1139         }
1140
1141         case Iop_Left32: {
1142            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1143            Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1144            s32 = (s32 | (-s32));
1145            e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1146            break;
1147         }
1148
1149         case Iop_Left64: {
1150            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1151            Long  s64 = (Long)u64;
1152            s64 = (s64 | (-s64));
1153            e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1154            break;
1155         }
1156
1157         default:
1158            goto unhandled;
1159      }
1160   }
1161
1162   /* BINARY ops */
1163   if (e->tag == Iex_Binop) {
1164      if (e->Iex.Binop.arg1->tag == Iex_Const
1165          && e->Iex.Binop.arg2->tag == Iex_Const) {
1166         /* cases where both args are consts */
1167         switch (e->Iex.Binop.op) {
1168
1169            /* -- Or -- */
1170            case Iop_Or8:
1171               e2 = IRExpr_Const(IRConst_U8(toUChar(
1172                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1173                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1174               break;
1175            case Iop_Or16:
1176               e2 = IRExpr_Const(IRConst_U16(toUShort(
1177                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1178                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1179               break;
1180            case Iop_Or32:
1181               e2 = IRExpr_Const(IRConst_U32(
1182                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1183                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1184               break;
1185            case Iop_Or64:
1186               e2 = IRExpr_Const(IRConst_U64(
1187                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1188                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1189               break;
1190
1191            /* -- Xor -- */
1192            case Iop_Xor8:
1193               e2 = IRExpr_Const(IRConst_U8(toUChar(
1194                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1195                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1196               break;
1197            case Iop_Xor16:
1198               e2 = IRExpr_Const(IRConst_U16(toUShort(
1199                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1200                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1201               break;
1202            case Iop_Xor32:
1203               e2 = IRExpr_Const(IRConst_U32(
1204                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1205                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1206               break;
1207            case Iop_Xor64:
1208               e2 = IRExpr_Const(IRConst_U64(
1209                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1210                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1211               break;
1212
1213            /* -- And -- */
1214            case Iop_And8:
1215               e2 = IRExpr_Const(IRConst_U8(toUChar(
1216                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1217                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1218               break;
1219            case Iop_And32:
1220               e2 = IRExpr_Const(IRConst_U32(
1221                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1222                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1223               break;
1224            case Iop_And64:
1225               e2 = IRExpr_Const(IRConst_U64(
1226                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1227                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1228               break;
1229
1230            /* -- Add -- */
1231            case Iop_Add8:
1232               e2 = IRExpr_Const(IRConst_U8(toUChar(
1233                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1234                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1235               break;
1236            case Iop_Add32:
1237               e2 = IRExpr_Const(IRConst_U32(
1238                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1239                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1240               break;
1241            case Iop_Add64:
1242               e2 = IRExpr_Const(IRConst_U64(
1243                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1244                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1245               break;
1246
1247            /* -- Sub -- */
1248            case Iop_Sub8:
1249               e2 = IRExpr_Const(IRConst_U8(toUChar(
1250                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1251                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1252               break;
1253            case Iop_Sub32:
1254               e2 = IRExpr_Const(IRConst_U32(
1255                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1256                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1257               break;
1258            case Iop_Sub64:
1259               e2 = IRExpr_Const(IRConst_U64(
1260                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1261                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1262               break;
1263
1264            /* -- Max32U -- */
1265            case Iop_Max32U: {
1266               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1267               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1268               UInt res  = u32a > u32b ? u32a : u32b;
1269               e2 = IRExpr_Const(IRConst_U32(res));
1270               break;
1271            }
1272
1273            /* -- Mul -- */
1274            case Iop_Mul32:
1275               e2 = IRExpr_Const(IRConst_U32(
1276                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1277                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1278               break;
1279            case Iop_Mul64:
1280               e2 = IRExpr_Const(IRConst_U64(
1281                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1282                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1283               break;
1284
1285            case Iop_MullS32: {
1286               /* very paranoid */
1287               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1288               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1289               Int   s32a = (Int)u32a;
1290               Int   s32b = (Int)u32b;
1291               Long  s64a = (Long)s32a;
1292               Long  s64b = (Long)s32b;
1293               Long  sres = s64a * s64b;
1294               ULong ures = (ULong)sres;
1295               e2 = IRExpr_Const(IRConst_U64(ures));
1296               break;
1297            }
1298
1299            /* -- Shl -- */
1300            case Iop_Shl32:
1301               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1302               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1303               if (shift >= 0 && shift <= 31)
1304                  e2 = IRExpr_Const(IRConst_U32(
1305                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1306                           << shift)));
1307               break;
1308            case Iop_Shl64:
1309               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1310               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1311               if (shift >= 0 && shift <= 63)
1312                  e2 = IRExpr_Const(IRConst_U64(
1313                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1314                           << shift)));
1315               break;
1316
1317            /* -- Sar -- */
1318            case Iop_Sar32: {
1319               /* paranoid ... */
1320               /*signed*/ Int s32;
1321               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1322               s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1323               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1324               if (shift >= 0 && shift <= 31) {
1325                  s32 >>=/*signed*/ shift;
1326                  e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1327               }
1328               break;
1329            }
1330            case Iop_Sar64: {
1331               /* paranoid ... */
1332               /*signed*/ Long s64;
1333               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1334               s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1335               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1336               if (shift >= 0 && shift <= 63) {
1337                  s64 >>=/*signed*/ shift;
1338                  e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1339               }
1340               break;
1341            }
1342
1343            /* -- Shr -- */
1344            case Iop_Shr32: {
1345               /* paranoid ... */
1346               /*unsigned*/ UInt u32;
1347               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1348               u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1349               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1350               if (shift >= 0 && shift <= 31) {
1351                  u32 >>=/*unsigned*/ shift;
1352                  e2 = IRExpr_Const(IRConst_U32(u32));
1353               }
1354               break;
1355            }
1356            case Iop_Shr64: {
1357               /* paranoid ... */
1358               /*unsigned*/ ULong u64;
1359               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1360               u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1361               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1362               if (shift >= 0 && shift <= 63) {
1363                  u64 >>=/*unsigned*/ shift;
1364                  e2 = IRExpr_Const(IRConst_U64(u64));
1365               }
1366               break;
1367            }
1368
1369            /* -- CmpEQ -- */
1370            case Iop_CmpEQ32:
1371               e2 = IRExpr_Const(IRConst_U1(toBool(
1372                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1373                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1374               break;
1375            case Iop_CmpEQ64:
1376               e2 = IRExpr_Const(IRConst_U1(toBool(
1377                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1378                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1379               break;
1380
1381            /* -- CmpNE -- */
1382            case Iop_CmpNE8:
1383               e2 = IRExpr_Const(IRConst_U1(toBool(
1384                       ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1385                        != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1386               break;
1387            case Iop_CmpNE32:
1388               e2 = IRExpr_Const(IRConst_U1(toBool(
1389                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1390                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1391               break;
1392            case Iop_CmpNE64:
1393               e2 = IRExpr_Const(IRConst_U1(toBool(
1394                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1395                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1396               break;
1397
1398            /* -- CmpLEU -- */
1399            case Iop_CmpLE32U:
1400               e2 = IRExpr_Const(IRConst_U1(toBool(
1401                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1402                        <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1403               break;
1404
1405            /* -- CmpLES -- */
1406            case Iop_CmpLE32S:
1407               e2 = IRExpr_Const(IRConst_U1(toBool(
1408                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1409                        <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1410               break;
1411
1412            /* -- CmpLTS -- */
1413            case Iop_CmpLT32S:
1414               e2 = IRExpr_Const(IRConst_U1(toBool(
1415                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1416                        < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1417               break;
1418
1419            /* -- CmpLTU -- */
1420            case Iop_CmpLT32U:
1421               e2 = IRExpr_Const(IRConst_U1(toBool(
1422                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1423                        < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1424               break;
1425
1426            /* -- CmpORD -- */
1427            case Iop_CmpORD32S: {
1428               /* very paranoid */
1429               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1430               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1431               Int   s32a = (Int)u32a;
1432               Int   s32b = (Int)u32b;
1433               Int   r = 0x2; /* EQ */
1434               if (s32a < s32b) {
1435                  r = 0x8; /* LT */
1436               }
1437               else if (s32a > s32b) {
1438                  r = 0x4; /* GT */
1439               }
1440               e2 = IRExpr_Const(IRConst_U32(r));
1441               break;
1442            }
1443
1444            /* -- nHLto2n -- */
1445            case Iop_32HLto64:
1446               e2 = IRExpr_Const(IRConst_U64(
1447                       (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1448                       | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1449                    ));
1450               break;
1451            case Iop_64HLto128:
1452               /* We can't fold this, because there is no way to
1453                  express he result in IR, but at least pretend to
1454                  handle it, so as to stop getting blasted with
1455                  no-rule-for-this-primop messages. */
1456               break;
1457
1458            default:
1459               goto unhandled;
1460         }
1461
1462      } else {
1463
1464         /* other cases (identities, etc) */
1465
1466         /* Shl64/Shr64(x,0) ==> x */
1467         if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
1468             && e->Iex.Binop.arg2->tag == Iex_Const
1469             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1470            e2 = e->Iex.Binop.arg1;
1471         } else
1472
1473         /* Shl32/Shr32(x,0) ==> x */
1474         if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
1475             && e->Iex.Binop.arg2->tag == Iex_Const
1476             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1477            e2 = e->Iex.Binop.arg1;
1478         } else
1479
1480         /* Or8(x,0) ==> x */
1481         if ((e->Iex.Binop.op == Iop_Or8)
1482             && e->Iex.Binop.arg2->tag == Iex_Const
1483             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1484            e2 = e->Iex.Binop.arg1;
1485         } else
1486
1487         /* Or16(x,0) ==> x */
1488         if ((e->Iex.Binop.op == Iop_Or16)
1489             && e->Iex.Binop.arg2->tag == Iex_Const
1490             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
1491            e2 = e->Iex.Binop.arg1;
1492         } else
1493
1494         /* Or32/Add32/Max32U(x,0) ==> x */
1495         if ((e->Iex.Binop.op == Iop_Add32
1496              || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1497             && e->Iex.Binop.arg2->tag == Iex_Const
1498             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1499            e2 = e->Iex.Binop.arg1;
1500         } else
1501
1502         /* Add32(t,t) ==> t << 1.  Memcheck doesn't understand that
1503            x+x produces a defined least significant bit, and it seems
1504            simplest just to get rid of the problem by rewriting it
1505            out, since the opportunity to do so exists. */
1506         if (e->Iex.Binop.op == Iop_Add32
1507             && e->Iex.Binop.arg1->tag == Iex_RdTmp
1508             && e->Iex.Binop.arg2->tag == Iex_RdTmp
1509             && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1510                == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1511            e2 = IRExpr_Binop(Iop_Shl32,
1512                              e->Iex.Binop.arg1,
1513                              IRExpr_Const(IRConst_U8(1)));
1514         } else
1515
1516         /* Add64(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
1517         if (e->Iex.Binop.op == Iop_Add64
1518             && e->Iex.Binop.arg1->tag == Iex_RdTmp
1519             && e->Iex.Binop.arg2->tag == Iex_RdTmp
1520             && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1521                == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1522            e2 = IRExpr_Binop(Iop_Shl64,
1523                              e->Iex.Binop.arg1,
1524                              IRExpr_Const(IRConst_U8(1)));
1525         } else
1526
1527         /* Add8(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
1528         if (e->Iex.Binop.op == Iop_Add8
1529             && e->Iex.Binop.arg1->tag == Iex_RdTmp
1530             && e->Iex.Binop.arg2->tag == Iex_RdTmp
1531             && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1532                == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1533            e2 = IRExpr_Binop(Iop_Shl8,
1534                              e->Iex.Binop.arg1,
1535                              IRExpr_Const(IRConst_U8(1)));
1536         } else
1537         /* NB no Add16(t,t) case yet as no known test case exists */
1538
1539         /* Or64/Add64(x,0) ==> x */
1540         if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
1541             && e->Iex.Binop.arg2->tag == Iex_Const
1542             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
1543            e2 = e->Iex.Binop.arg1;
1544         } else
1545
1546         /* And32(x,0xFFFFFFFF) ==> x */
1547         if (e->Iex.Binop.op == Iop_And32
1548             && e->Iex.Binop.arg2->tag == Iex_Const
1549             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1550            e2 = e->Iex.Binop.arg1;
1551         } else
1552
1553         /* And32(x,0) ==> 0 */
1554         if (e->Iex.Binop.op == Iop_And32
1555             && e->Iex.Binop.arg2->tag == Iex_Const
1556             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1557            e2 = IRExpr_Const(IRConst_U32(0));
1558         } else
1559
1560         /* And32/Shl32(0,x) ==> 0 */
1561         if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
1562             && e->Iex.Binop.arg1->tag == Iex_Const
1563             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1564            e2 = IRExpr_Const(IRConst_U32(0));
1565         } else
1566
1567         /* Or8(0,x) ==> x */
1568         if (e->Iex.Binop.op == Iop_Or8
1569             && e->Iex.Binop.arg1->tag == Iex_Const
1570             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
1571            e2 = e->Iex.Binop.arg2;
1572         } else
1573
1574         /* Or32/Max32U(0,x) ==> x */
1575         if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1576             && e->Iex.Binop.arg1->tag == Iex_Const
1577             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1578            e2 = e->Iex.Binop.arg2;
1579         } else
1580
1581         /* Or64(0,x) ==> x */
1582         if (e->Iex.Binop.op == Iop_Or64
1583             && e->Iex.Binop.arg1->tag == Iex_Const
1584             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
1585            e2 = e->Iex.Binop.arg2;
1586         } else
1587
1588         /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
1589         /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1590         /* Max32U(t,t) ==> t, for some IRTemp t */
1591         switch (e->Iex.Binop.op) {
1592            case Iop_And64: case Iop_And32:
1593            case Iop_And16: case Iop_And8:
1594            case Iop_Or64: case Iop_Or32:
1595            case Iop_Or16: case Iop_Or8: case Iop_OrV128:
1596            case Iop_Max32U:
1597               if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1598                  e2 = e->Iex.Binop.arg1;
1599               break;
1600            default:
1601               break;
1602         }
1603
1604         /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
1605         /* Sub32/64(t,t) ==> 0, for some IRTemp t */
1606         switch (e->Iex.Binop.op) {
1607            case Iop_Xor64: case Iop_Xor32:
1608            case Iop_Xor16: case Iop_Xor8:
1609            case Iop_XorV128:
1610            case Iop_Sub64: case Iop_Sub32:
1611               if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1612                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
1613               break;
1614            default:
1615               break;
1616         }
1617
1618         switch (e->Iex.Binop.op) {
1619            case Iop_CmpEQ64:
1620            case Iop_CmpEQ8x8:
1621            case Iop_CmpEQ8x16:
1622               if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1623                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
1624               break;
1625            default:
1626               break;
1627         }
1628
1629      }
1630   }
1631
1632   /* Mux0X */
1633   if (e->tag == Iex_Mux0X) {
1634      /* is the discriminant is a constant? */
1635      if (e->Iex.Mux0X.cond->tag == Iex_Const) {
1636         Bool zero;
1637         /* assured us by the IR type rules */
1638         vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
1639         zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1640                                     ->Iex.Const.con->Ico.U8));
1641         e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1642      }
1643      else
1644      /* are the arms identical? (pretty weedy test) */
1645      if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
1646                               e->Iex.Mux0X.exprX)) {
1647         e2 = e->Iex.Mux0X.expr0;
1648      }
1649   }
1650
1651   /* Show cases where we've found but not folded 'op(t,t)'. */
1652   if (0 && e == e2 && e->tag == Iex_Binop
1653       && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1654      vex_printf("IDENT: ");
1655      ppIRExpr(e); vex_printf("\n");
1656   }
1657
1658   /* Show the overall results of folding. */
1659   if (DEBUG_IROPT && e2 != e) {
1660      vex_printf("FOLD: ");
1661      ppIRExpr(e); vex_printf("  ->  ");
1662      ppIRExpr(e2); vex_printf("\n");
1663   }
1664
1665   return e2;
1666
1667 unhandled:
1668#  if 0
1669   vex_printf("\n\n");
1670   ppIRExpr(e);
1671   vpanic("fold_Expr: no rule for the above");
1672#  else
1673   if (vex_control.iropt_verbosity > 0) {
1674      vex_printf("vex iropt: fold_Expr: no rule for: ");
1675      ppIRExpr(e);
1676      vex_printf("\n");
1677   }
1678   return e2;
1679#  endif
1680}
1681
1682
1683/* Apply the subst to a simple 1-level expression -- guaranteed to be
1684   1-level due to previous flattening pass. */
1685
1686static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
1687{
1688   switch (ex->tag) {
1689      case Iex_RdTmp:
1690         if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
1691            return env[(Int)ex->Iex.RdTmp.tmp];
1692         } else {
1693            /* not bound in env */
1694            return ex;
1695         }
1696
1697      case Iex_Const:
1698      case Iex_Get:
1699         return ex;
1700
1701      case Iex_GetI:
1702         vassert(isIRAtom(ex->Iex.GetI.ix));
1703         return IRExpr_GetI(
1704            ex->Iex.GetI.descr,
1705            subst_Expr(env, ex->Iex.GetI.ix),
1706            ex->Iex.GetI.bias
1707         );
1708
1709      case Iex_Qop:
1710         vassert(isIRAtom(ex->Iex.Qop.arg1));
1711         vassert(isIRAtom(ex->Iex.Qop.arg2));
1712         vassert(isIRAtom(ex->Iex.Qop.arg3));
1713         vassert(isIRAtom(ex->Iex.Qop.arg4));
1714         return IRExpr_Qop(
1715                   ex->Iex.Qop.op,
1716                   subst_Expr(env, ex->Iex.Qop.arg1),
1717                   subst_Expr(env, ex->Iex.Qop.arg2),
1718                   subst_Expr(env, ex->Iex.Qop.arg3),
1719                   subst_Expr(env, ex->Iex.Qop.arg4)
1720                );
1721
1722      case Iex_Triop:
1723         vassert(isIRAtom(ex->Iex.Triop.arg1));
1724         vassert(isIRAtom(ex->Iex.Triop.arg2));
1725         vassert(isIRAtom(ex->Iex.Triop.arg3));
1726         return IRExpr_Triop(
1727                   ex->Iex.Triop.op,
1728                   subst_Expr(env, ex->Iex.Triop.arg1),
1729                   subst_Expr(env, ex->Iex.Triop.arg2),
1730                   subst_Expr(env, ex->Iex.Triop.arg3)
1731                );
1732
1733      case Iex_Binop:
1734         vassert(isIRAtom(ex->Iex.Binop.arg1));
1735         vassert(isIRAtom(ex->Iex.Binop.arg2));
1736         return IRExpr_Binop(
1737                   ex->Iex.Binop.op,
1738                   subst_Expr(env, ex->Iex.Binop.arg1),
1739                   subst_Expr(env, ex->Iex.Binop.arg2)
1740                );
1741
1742      case Iex_Unop:
1743         vassert(isIRAtom(ex->Iex.Unop.arg));
1744         return IRExpr_Unop(
1745                   ex->Iex.Unop.op,
1746                   subst_Expr(env, ex->Iex.Unop.arg)
1747                );
1748
1749      case Iex_Load:
1750         vassert(isIRAtom(ex->Iex.Load.addr));
1751         return IRExpr_Load(
1752                   ex->Iex.Load.end,
1753                   ex->Iex.Load.ty,
1754                   subst_Expr(env, ex->Iex.Load.addr)
1755                );
1756
1757      case Iex_CCall: {
1758         Int      i;
1759         IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
1760         for (i = 0; args2[i]; i++) {
1761            vassert(isIRAtom(args2[i]));
1762            args2[i] = subst_Expr(env, args2[i]);
1763         }
1764         return IRExpr_CCall(
1765                   ex->Iex.CCall.cee,
1766                   ex->Iex.CCall.retty,
1767                   args2
1768                );
1769      }
1770
1771      case Iex_Mux0X:
1772         vassert(isIRAtom(ex->Iex.Mux0X.cond));
1773         vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1774         vassert(isIRAtom(ex->Iex.Mux0X.exprX));
1775         return IRExpr_Mux0X(
1776                   subst_Expr(env, ex->Iex.Mux0X.cond),
1777                   subst_Expr(env, ex->Iex.Mux0X.expr0),
1778                   subst_Expr(env, ex->Iex.Mux0X.exprX)
1779                );
1780
1781      default:
1782         vex_printf("\n\n"); ppIRExpr(ex);
1783         vpanic("subst_Expr");
1784
1785   }
1786}
1787
1788
1789/* Apply the subst to stmt, then fold the result as much as possible.
1790   Much simplified due to stmt being previously flattened.  As a
1791   result of this, the stmt may wind up being turned into a no-op.
1792*/
1793static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
1794{
1795#  if 0
1796   vex_printf("\nsubst and fold stmt\n");
1797   ppIRStmt(st);
1798   vex_printf("\n");
1799#  endif
1800
1801   switch (st->tag) {
1802      case Ist_AbiHint:
1803         vassert(isIRAtom(st->Ist.AbiHint.base));
1804         vassert(isIRAtom(st->Ist.AbiHint.nia));
1805         return IRStmt_AbiHint(
1806                   fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
1807                   st->Ist.AbiHint.len,
1808                   fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
1809                );
1810      case Ist_Put:
1811         vassert(isIRAtom(st->Ist.Put.data));
1812         return IRStmt_Put(
1813                   st->Ist.Put.offset,
1814                   fold_Expr(subst_Expr(env, st->Ist.Put.data))
1815                );
1816
1817      case Ist_PutI:
1818         vassert(isIRAtom(st->Ist.PutI.ix));
1819         vassert(isIRAtom(st->Ist.PutI.data));
1820         return IRStmt_PutI(
1821                   st->Ist.PutI.descr,
1822                   fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
1823                   st->Ist.PutI.bias,
1824                   fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1825                );
1826
1827      case Ist_WrTmp:
1828         /* This is the one place where an expr (st->Ist.WrTmp.data) is
1829            allowed to be more than just a constant or a tmp. */
1830         return IRStmt_WrTmp(
1831                   st->Ist.WrTmp.tmp,
1832                   fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
1833                );
1834
1835      case Ist_Store:
1836         vassert(isIRAtom(st->Ist.Store.addr));
1837         vassert(isIRAtom(st->Ist.Store.data));
1838         return IRStmt_Store(
1839                   st->Ist.Store.end,
1840                   fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
1841                   fold_Expr(subst_Expr(env, st->Ist.Store.data))
1842                );
1843
1844      case Ist_CAS: {
1845         IRCAS *cas, *cas2;
1846         cas = st->Ist.CAS.details;
1847         vassert(isIRAtom(cas->addr));
1848         vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
1849         vassert(isIRAtom(cas->expdLo));
1850         vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
1851         vassert(isIRAtom(cas->dataLo));
1852         cas2 = mkIRCAS(
1853                   cas->oldHi, cas->oldLo, cas->end,
1854                   fold_Expr(subst_Expr(env, cas->addr)),
1855                   cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
1856                   fold_Expr(subst_Expr(env, cas->expdLo)),
1857                   cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
1858                   fold_Expr(subst_Expr(env, cas->dataLo))
1859                );
1860         return IRStmt_CAS(cas2);
1861      }
1862
1863      case Ist_LLSC:
1864         vassert(isIRAtom(st->Ist.LLSC.addr));
1865         if (st->Ist.LLSC.storedata)
1866            vassert(isIRAtom(st->Ist.LLSC.storedata));
1867         return IRStmt_LLSC(
1868                   st->Ist.LLSC.end,
1869                   st->Ist.LLSC.result,
1870                   fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
1871                   st->Ist.LLSC.storedata
1872                      ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
1873                      : NULL
1874                );
1875
1876      case Ist_Dirty: {
1877         Int     i;
1878         IRDirty *d, *d2;
1879         d = st->Ist.Dirty.details;
1880         d2 = emptyIRDirty();
1881         *d2 = *d;
1882         d2->args = shallowCopyIRExprVec(d2->args);
1883         if (d2->mFx != Ifx_None) {
1884            vassert(isIRAtom(d2->mAddr));
1885            d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
1886         }
1887         vassert(isIRAtom(d2->guard));
1888         d2->guard = fold_Expr(subst_Expr(env, d2->guard));
1889         for (i = 0; d2->args[i]; i++) {
1890            vassert(isIRAtom(d2->args[i]));
1891            d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
1892         }
1893         return IRStmt_Dirty(d2);
1894      }
1895
1896      case Ist_IMark:
1897         return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
1898
1899      case Ist_NoOp:
1900         return IRStmt_NoOp();
1901
1902      case Ist_MBE:
1903         return IRStmt_MBE(st->Ist.MBE.event);
1904
1905      case Ist_Exit: {
1906         IRExpr* fcond;
1907         vassert(isIRAtom(st->Ist.Exit.guard));
1908         fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
1909         if (fcond->tag == Iex_Const) {
1910            /* Interesting.  The condition on this exit has folded down to
1911               a constant. */
1912            vassert(fcond->Iex.Const.con->tag == Ico_U1);
1913            vassert(fcond->Iex.Const.con->Ico.U1 == False
1914                    || fcond->Iex.Const.con->Ico.U1 == True);
1915            if (fcond->Iex.Const.con->Ico.U1 == False) {
1916               /* exit is never going to happen, so dump the statement. */
1917               return IRStmt_NoOp();
1918            } else {
1919               vassert(fcond->Iex.Const.con->Ico.U1 == True);
1920               /* Hmmm.  The exit has become unconditional.  Leave it
1921                  as it is for now, since we'd have to truncate the BB
1922                  at this point, which is tricky.  Such truncation is
1923                  done later by the dead-code elimination pass. */
1924               /* fall out into the reconstruct-the-exit code. */
1925               if (vex_control.iropt_verbosity > 0)
1926                  /* really a misuse of vex_control.iropt_verbosity */
1927                  vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
1928            }
1929         }
1930         return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
1931      }
1932
1933   default:
1934      vex_printf("\n"); ppIRStmt(st);
1935      vpanic("subst_and_fold_Stmt");
1936   }
1937}
1938
1939
1940IRSB* cprop_BB ( IRSB* in )
1941{
1942   Int      i;
1943   IRSB*    out;
1944   IRStmt*  st2;
1945   Int      n_tmps = in->tyenv->types_used;
1946   IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
1947
1948   out = emptyIRSB();
1949   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
1950
1951   /* Set up the env with which travels forward.  This holds a
1952      substitution, mapping IRTemps to atoms, that is, IRExprs which
1953      are either IRTemps or IRConsts.  Thus, copy and constant
1954      propagation is done.  The environment is to be applied as we
1955      move along.  Keys are IRTemps.  Values are IRExpr*s.
1956   */
1957   for (i = 0; i < n_tmps; i++)
1958      env[i] = NULL;
1959
1960   /* For each original SSA-form stmt ... */
1961   for (i = 0; i < in->stmts_used; i++) {
1962
1963      /* First apply the substitution to the current stmt.  This
1964         propagates in any constants and tmp-tmp assignments
1965         accumulated prior to this point.  As part of the subst_Stmt
1966         call, also then fold any constant expressions resulting. */
1967
1968      st2 = in->stmts[i];
1969
1970      /* perhaps st2 is already a no-op? */
1971      if (st2->tag == Ist_NoOp) continue;
1972
1973      st2 = subst_and_fold_Stmt( env, st2 );
1974
1975      /* If the statement has been folded into a no-op, forget it. */
1976      if (st2->tag == Ist_NoOp) continue;
1977
1978      /* Now consider what the stmt looks like.  If it's of the form
1979         't = const' or 't1 = t2', add it to the running environment
1980         and not to the output BB.  Otherwise, add it to the output
1981         BB.  Note, we choose not to propagate const when const is an
1982         F64i, so that F64i literals can be CSE'd later.  This helps
1983         x86 floating point code generation. */
1984
1985      if (st2->tag == Ist_WrTmp
1986          && st2->Ist.WrTmp.data->tag == Iex_Const
1987          && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
1988         /* 't = const' -- add to env.
1989             The pair (IRTemp, IRExpr*) is added. */
1990         env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
1991      }
1992      else
1993      if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
1994         /* 't1 = t2' -- add to env.
1995             The pair (IRTemp, IRExpr*) is added. */
1996         env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
1997      }
1998      else {
1999         /* Not interesting, copy st2 into the output block. */
2000         addStmtToIRSB( out, st2 );
2001      }
2002   }
2003
2004   out->next     = subst_Expr( env, in->next );
2005   out->jumpkind = in->jumpkind;
2006   return out;
2007}
2008
2009
2010/*---------------------------------------------------------------*/
2011/*--- Dead code (t = E) removal                               ---*/
2012/*---------------------------------------------------------------*/
2013
2014/* As a side effect, also removes all code following an unconditional
2015   side exit. */
2016
2017/* The type of the HashHW map is: a map from IRTemp to nothing
2018   -- really just operating a set or IRTemps.
2019*/
2020
2021inline
2022static void addUses_Temp ( Bool* set, IRTemp tmp )
2023{
2024   set[(Int)tmp] = True;
2025}
2026
2027static void addUses_Expr ( Bool* set, IRExpr* e )
2028{
2029   Int i;
2030   switch (e->tag) {
2031      case Iex_GetI:
2032         addUses_Expr(set, e->Iex.GetI.ix);
2033         return;
2034      case Iex_Mux0X:
2035         addUses_Expr(set, e->Iex.Mux0X.cond);
2036         addUses_Expr(set, e->Iex.Mux0X.expr0);
2037         addUses_Expr(set, e->Iex.Mux0X.exprX);
2038         return;
2039      case Iex_CCall:
2040         for (i = 0; e->Iex.CCall.args[i]; i++)
2041            addUses_Expr(set, e->Iex.CCall.args[i]);
2042         return;
2043      case Iex_Load:
2044         addUses_Expr(set, e->Iex.Load.addr);
2045         return;
2046      case Iex_Qop:
2047         addUses_Expr(set, e->Iex.Qop.arg1);
2048         addUses_Expr(set, e->Iex.Qop.arg2);
2049         addUses_Expr(set, e->Iex.Qop.arg3);
2050         addUses_Expr(set, e->Iex.Qop.arg4);
2051         return;
2052      case Iex_Triop:
2053         addUses_Expr(set, e->Iex.Triop.arg1);
2054         addUses_Expr(set, e->Iex.Triop.arg2);
2055         addUses_Expr(set, e->Iex.Triop.arg3);
2056         return;
2057      case Iex_Binop:
2058         addUses_Expr(set, e->Iex.Binop.arg1);
2059         addUses_Expr(set, e->Iex.Binop.arg2);
2060         return;
2061      case Iex_Unop:
2062         addUses_Expr(set, e->Iex.Unop.arg);
2063         return;
2064      case Iex_RdTmp:
2065         addUses_Temp(set, e->Iex.RdTmp.tmp);
2066         return;
2067      case Iex_Const:
2068      case Iex_Get:
2069         return;
2070      default:
2071         vex_printf("\n");
2072         ppIRExpr(e);
2073         vpanic("addUses_Expr");
2074   }
2075}
2076
2077static void addUses_Stmt ( Bool* set, IRStmt* st )
2078{
2079   Int      i;
2080   IRDirty* d;
2081   IRCAS*   cas;
2082   switch (st->tag) {
2083      case Ist_AbiHint:
2084         addUses_Expr(set, st->Ist.AbiHint.base);
2085         addUses_Expr(set, st->Ist.AbiHint.nia);
2086         return;
2087      case Ist_PutI:
2088         addUses_Expr(set, st->Ist.PutI.ix);
2089         addUses_Expr(set, st->Ist.PutI.data);
2090         return;
2091      case Ist_WrTmp:
2092         addUses_Expr(set, st->Ist.WrTmp.data);
2093         return;
2094      case Ist_Put:
2095         addUses_Expr(set, st->Ist.Put.data);
2096         return;
2097      case Ist_Store:
2098         addUses_Expr(set, st->Ist.Store.addr);
2099         addUses_Expr(set, st->Ist.Store.data);
2100         return;
2101      case Ist_CAS:
2102         cas = st->Ist.CAS.details;
2103         addUses_Expr(set, cas->addr);
2104         if (cas->expdHi)
2105            addUses_Expr(set, cas->expdHi);
2106         addUses_Expr(set, cas->expdLo);
2107         if (cas->dataHi)
2108            addUses_Expr(set, cas->dataHi);
2109         addUses_Expr(set, cas->dataLo);
2110         return;
2111      case Ist_LLSC:
2112         addUses_Expr(set, st->Ist.LLSC.addr);
2113         if (st->Ist.LLSC.storedata)
2114            addUses_Expr(set, st->Ist.LLSC.storedata);
2115         return;
2116      case Ist_Dirty:
2117         d = st->Ist.Dirty.details;
2118         if (d->mFx != Ifx_None)
2119            addUses_Expr(set, d->mAddr);
2120         addUses_Expr(set, d->guard);
2121         for (i = 0; d->args[i] != NULL; i++)
2122            addUses_Expr(set, d->args[i]);
2123         return;
2124      case Ist_NoOp:
2125      case Ist_IMark:
2126      case Ist_MBE:
2127         return;
2128      case Ist_Exit:
2129         addUses_Expr(set, st->Ist.Exit.guard);
2130         return;
2131      default:
2132         vex_printf("\n");
2133         ppIRStmt(st);
2134         vpanic("addUses_Stmt");
2135   }
2136}
2137
2138
2139/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
2140static Bool isZeroU1 ( IRExpr* e )
2141{
2142   return toBool( e->tag == Iex_Const
2143                  && e->Iex.Const.con->tag == Ico_U1
2144                  && e->Iex.Const.con->Ico.U1 == False );
2145}
2146
2147/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
2148static Bool isOneU1 ( IRExpr* e )
2149{
2150   return toBool( e->tag == Iex_Const
2151                  && e->Iex.Const.con->tag == Ico_U1
2152                  && e->Iex.Const.con->Ico.U1 == True );
2153}
2154
2155
2156/* Note, this destructively modifies the given IRSB. */
2157
2158/* Scan backwards through statements, carrying a set of IRTemps which
2159   are known to be used after the current point.  On encountering 't =
2160   E', delete the binding if it is not used.  Otherwise, add any temp
2161   uses to the set and keep on moving backwards.
2162
2163   As an enhancement, the first (backwards) pass searches for IR exits
2164   with always-taken conditions and notes the location of the earliest
2165   one in the block.  If any such are found, a second pass copies the
2166   exit destination and jump kind to the bb-end.  Then, the exit and
2167   all statements following it are turned into no-ops.
2168*/
2169
2170/* notstatic */ void do_deadcode_BB ( IRSB* bb )
2171{
2172   Int     i, i_unconditional_exit;
2173   Int     n_tmps = bb->tyenv->types_used;
2174   Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2175   IRStmt* st;
2176
2177   for (i = 0; i < n_tmps; i++)
2178      set[i] = False;
2179
2180   /* start off by recording IRTemp uses in the next field. */
2181   addUses_Expr(set, bb->next);
2182
2183   /* First pass */
2184
2185   /* Work backwards through the stmts */
2186   i_unconditional_exit = -1;
2187   for (i = bb->stmts_used-1; i >= 0; i--) {
2188      st = bb->stmts[i];
2189      if (st->tag == Ist_NoOp)
2190         continue;
2191      /* take note of any unconditional exits */
2192      if (st->tag == Ist_Exit
2193          && isOneU1(st->Ist.Exit.guard))
2194         i_unconditional_exit = i;
2195      if (st->tag == Ist_WrTmp
2196          && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2197          /* it's an IRTemp which never got used.  Delete it. */
2198         if (DEBUG_IROPT) {
2199            vex_printf("DEAD: ");
2200            ppIRStmt(st);
2201            vex_printf("\n");
2202         }
2203         bb->stmts[i] = IRStmt_NoOp();
2204      }
2205      else
2206      if (st->tag == Ist_Dirty
2207          && st->Ist.Dirty.details->guard
2208          && isZeroU1(st->Ist.Dirty.details->guard)) {
2209         /* This is a dirty helper which will never get called.
2210            Delete it. */
2211         bb->stmts[i] = IRStmt_NoOp();
2212       }
2213       else {
2214         /* Note any IRTemp uses made by the current statement. */
2215         addUses_Stmt(set, st);
2216      }
2217   }
2218
2219   /* Optional second pass: if any unconditional exits were found,
2220      delete them and all following statements. */
2221
2222   if (i_unconditional_exit != -1) {
2223      if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2224                        i_unconditional_exit);
2225      vassert(i_unconditional_exit >= 0
2226              && i_unconditional_exit < bb->stmts_used);
2227      bb->next
2228         = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2229      bb->jumpkind
2230         = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2231      for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2232         bb->stmts[i] = IRStmt_NoOp();
2233   }
2234}
2235
2236
2237/*---------------------------------------------------------------*/
2238/*--- Specialisation of helper function calls, in             ---*/
2239/*--- collaboration with the front end                        ---*/
2240/*---------------------------------------------------------------*/
2241
2242static
2243IRSB* spec_helpers_BB(
2244         IRSB* bb,
2245         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
2246      )
2247{
2248   Int     i;
2249   IRStmt* st;
2250   IRExpr* ex;
2251   Bool    any = False;
2252
2253   for (i = bb->stmts_used-1; i >= 0; i--) {
2254      st = bb->stmts[i];
2255
2256      if (st->tag != Ist_WrTmp
2257          || st->Ist.WrTmp.data->tag != Iex_CCall)
2258         continue;
2259
2260      ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2261                          st->Ist.WrTmp.data->Iex.CCall.args,
2262                          &bb->stmts[0], i );
2263      if (!ex)
2264        /* the front end can't think of a suitable replacement */
2265        continue;
2266
2267      /* We got something better.  Install it in the bb. */
2268      any = True;
2269      bb->stmts[i]
2270         = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2271
2272      if (0) {
2273         vex_printf("SPEC: ");
2274         ppIRExpr(st->Ist.WrTmp.data);
2275         vex_printf("  -->  ");
2276         ppIRExpr(ex);
2277         vex_printf("\n");
2278      }
2279   }
2280
2281   if (any)
2282      bb = flatten_BB(bb);
2283   return bb;
2284}
2285
2286
2287/*---------------------------------------------------------------*/
2288/*--- Determination of guest state aliasing relationships     ---*/
2289/*---------------------------------------------------------------*/
2290
2291/* These are helper functions for CSE and GetI/PutI transformations.
2292
2293   Determine, to the extent possible, the relationship between two
2294   guest state accesses.  The possible outcomes are:
2295
2296   * Exact alias.  These two accesses denote precisely the same
2297     piece of the guest state.
2298
2299   * Definitely no alias.  These two accesses are guaranteed not to
2300     overlap any part of the guest state.
2301
2302   * Unknown -- if neither of the above can be established.
2303
2304   If in doubt, return Unknown.  */
2305
2306typedef
2307   enum { ExactAlias, NoAlias, UnknownAlias }
2308   GSAliasing;
2309
2310
2311/* Produces the alias relation between an indexed guest
2312   state access and a non-indexed access. */
2313
2314static
2315GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2316                                    Int offset2, IRType ty2 )
2317{
2318   UInt minoff1, maxoff1, minoff2, maxoff2;
2319
2320   getArrayBounds( descr1, &minoff1, &maxoff1 );
2321   minoff2 = offset2;
2322   maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2323
2324   if (maxoff1 < minoff2 || maxoff2 < minoff1)
2325      return NoAlias;
2326
2327   /* Could probably do better here if required.  For the moment
2328      however just claim not to know anything more. */
2329   return UnknownAlias;
2330}
2331
2332
2333/* Produces the alias relation between two indexed guest state
2334   accesses. */
2335
2336static
2337GSAliasing getAliasingRelation_II (
2338              IRRegArray* descr1, IRExpr* ix1, Int bias1,
2339              IRRegArray* descr2, IRExpr* ix2, Int bias2
2340           )
2341{
2342   UInt minoff1, maxoff1, minoff2, maxoff2;
2343   Int  iters;
2344
2345   /* First try hard to show they don't alias. */
2346   getArrayBounds( descr1, &minoff1, &maxoff1 );
2347   getArrayBounds( descr2, &minoff2, &maxoff2 );
2348   if (maxoff1 < minoff2 || maxoff2 < minoff1)
2349      return NoAlias;
2350
2351   /* So the two arrays at least partially overlap.  To get any
2352      further we'll have to be sure that the descriptors are
2353      identical. */
2354   if (!eqIRRegArray(descr1, descr2))
2355      return UnknownAlias;
2356
2357   /* The descriptors are identical.  Now the only difference can be
2358      in the index expressions.  If they cannot be shown to be
2359      identical, we have to say we don't know what the aliasing
2360      relation will be.  Now, since the IR is flattened, the index
2361      expressions should be atoms -- either consts or tmps.  So that
2362      makes the comparison simple. */
2363   vassert(isIRAtom(ix1));
2364   vassert(isIRAtom(ix2));
2365   if (!eqIRAtom(ix1,ix2))
2366      return UnknownAlias;
2367
2368   /* Ok, the index expressions are identical.  So now the only way
2369      they can be different is in the bias.  Normalise this
2370      paranoidly, to reliably establish equality/non-equality. */
2371
2372   /* So now we know that the GetI and PutI index the same array
2373      with the same base.  Are the offsets the same, modulo the
2374      array size?  Do this paranoidly. */
2375   vassert(descr1->nElems == descr2->nElems);
2376   vassert(descr1->elemTy == descr2->elemTy);
2377   vassert(descr1->base   == descr2->base);
2378   iters = 0;
2379   while (bias1 < 0 || bias2 < 0) {
2380      bias1 += descr1->nElems;
2381      bias2 += descr1->nElems;
2382      iters++;
2383      if (iters > 10)
2384         vpanic("getAliasingRelation: iters");
2385   }
2386   vassert(bias1 >= 0 && bias2 >= 0);
2387   bias1 %= descr1->nElems;
2388   bias2 %= descr1->nElems;
2389   vassert(bias1 >= 0 && bias1 < descr1->nElems);
2390   vassert(bias2 >= 0 && bias2 < descr1->nElems);
2391
2392   /* Finally, biasP and biasG are normalised into the range
2393      0 .. descrP/G->nElems - 1.  And so we can establish
2394      equality/non-equality. */
2395
2396   return bias1==bias2 ? ExactAlias : NoAlias;
2397}
2398
2399
2400/*---------------------------------------------------------------*/
2401/*--- Common Subexpression Elimination                        ---*/
2402/*---------------------------------------------------------------*/
2403
2404/* Expensive in time and space. */
2405
2406/* Uses two environments:
2407   a IRTemp -> IRTemp mapping
2408   a mapping from AvailExpr* to IRTemp
2409*/
2410
2411typedef
2412   struct {
2413      enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
2414      union {
2415         /* unop(tmp) */
2416         struct {
2417            IROp   op;
2418            IRTemp arg;
2419         } Ut;
2420         /* binop(tmp,tmp) */
2421         struct {
2422            IROp   op;
2423            IRTemp arg1;
2424            IRTemp arg2;
2425         } Btt;
2426         /* binop(tmp,const) */
2427         struct {
2428            IROp    op;
2429            IRTemp  arg1;
2430            IRConst con2;
2431         } Btc;
2432         /* binop(const,tmp) */
2433         struct {
2434            IROp    op;
2435            IRConst con1;
2436            IRTemp  arg2;
2437         } Bct;
2438         /* F64i-style const */
2439         struct {
2440            ULong f64i;
2441         } Cf64i;
2442         /* Mux0X(tmp,tmp,tmp) */
2443         struct {
2444            IRTemp co;
2445            IRTemp e0;
2446            IRTemp eX;
2447         } Mttt;
2448         /* GetI(descr,tmp,bias)*/
2449         struct {
2450            IRRegArray* descr;
2451            IRTemp      ix;
2452            Int         bias;
2453         } GetIt;
2454      } u;
2455   }
2456   AvailExpr;
2457
2458static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
2459{
2460   if (a1->tag != a2->tag)
2461      return False;
2462   switch (a1->tag) {
2463      case Ut:
2464         return toBool(
2465                a1->u.Ut.op == a2->u.Ut.op
2466                && a1->u.Ut.arg == a2->u.Ut.arg);
2467      case Btt:
2468         return toBool(
2469                a1->u.Btt.op == a2->u.Btt.op
2470                && a1->u.Btt.arg1 == a2->u.Btt.arg1
2471                && a1->u.Btt.arg2 == a2->u.Btt.arg2);
2472      case Btc:
2473         return toBool(
2474                a1->u.Btc.op == a2->u.Btc.op
2475                && a1->u.Btc.arg1 == a2->u.Btc.arg1
2476                && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
2477      case Bct:
2478         return toBool(
2479                a1->u.Bct.op == a2->u.Bct.op
2480                && a1->u.Bct.arg2 == a2->u.Bct.arg2
2481                && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
2482      case Cf64i:
2483         return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
2484      case Mttt:
2485         return toBool(a1->u.Mttt.co == a2->u.Mttt.co
2486                       && a1->u.Mttt.e0 == a2->u.Mttt.e0
2487                       && a1->u.Mttt.eX == a2->u.Mttt.eX);
2488      case GetIt:
2489         return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
2490                       && a1->u.GetIt.ix == a2->u.GetIt.ix
2491                       && a1->u.GetIt.bias == a2->u.GetIt.bias);
2492      default: vpanic("eq_AvailExpr");
2493   }
2494}
2495
2496static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
2497{
2498   IRConst* con;
2499   switch (ae->tag) {
2500      case Ut:
2501         return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
2502      case Btt:
2503         return IRExpr_Binop( ae->u.Btt.op,
2504                              IRExpr_RdTmp(ae->u.Btt.arg1),
2505                              IRExpr_RdTmp(ae->u.Btt.arg2) );
2506      case Btc:
2507         con = LibVEX_Alloc(sizeof(IRConst));
2508         *con = ae->u.Btc.con2;
2509         return IRExpr_Binop( ae->u.Btc.op,
2510                              IRExpr_RdTmp(ae->u.Btc.arg1),
2511                              IRExpr_Const(con) );
2512      case Bct:
2513         con = LibVEX_Alloc(sizeof(IRConst));
2514         *con = ae->u.Bct.con1;
2515         return IRExpr_Binop( ae->u.Bct.op,
2516                              IRExpr_Const(con),
2517                              IRExpr_RdTmp(ae->u.Bct.arg2) );
2518      case Cf64i:
2519         return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
2520      case Mttt:
2521         return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
2522                             IRExpr_RdTmp(ae->u.Mttt.e0),
2523                             IRExpr_RdTmp(ae->u.Mttt.eX));
2524      case GetIt:
2525         return IRExpr_GetI(ae->u.GetIt.descr,
2526                            IRExpr_RdTmp(ae->u.GetIt.ix),
2527                            ae->u.GetIt.bias);
2528      default:
2529         vpanic("availExpr_to_IRExpr");
2530   }
2531}
2532
2533inline
2534static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
2535{
2536   HWord res;
2537   /* env :: IRTemp -> IRTemp */
2538   if (lookupHHW( env, &res, (HWord)tmp ))
2539      return (IRTemp)res;
2540   else
2541      return tmp;
2542}
2543
2544static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
2545{
2546   /* env :: IRTemp -> IRTemp */
2547   switch (ae->tag) {
2548      case Ut:
2549         ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
2550         break;
2551      case Btt:
2552         ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
2553         ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
2554         break;
2555      case Btc:
2556         ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
2557         break;
2558      case Bct:
2559         ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
2560         break;
2561      case Cf64i:
2562         break;
2563      case Mttt:
2564         ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
2565         ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
2566         ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
2567         break;
2568      case GetIt:
2569         ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
2570         break;
2571      default:
2572         vpanic("subst_AvailExpr");
2573   }
2574}
2575
2576static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2577{
2578   AvailExpr* ae;
2579
2580   if (e->tag == Iex_Unop
2581       && e->Iex.Unop.arg->tag == Iex_RdTmp) {
2582      ae = LibVEX_Alloc(sizeof(AvailExpr));
2583      ae->tag      = Ut;
2584      ae->u.Ut.op  = e->Iex.Unop.op;
2585      ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
2586      return ae;
2587   }
2588
2589   if (e->tag == Iex_Binop
2590       && e->Iex.Binop.arg1->tag == Iex_RdTmp
2591       && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2592      ae = LibVEX_Alloc(sizeof(AvailExpr));
2593      ae->tag        = Btt;
2594      ae->u.Btt.op   = e->Iex.Binop.op;
2595      ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2596      ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2597      return ae;
2598   }
2599
2600   if (e->tag == Iex_Binop
2601      && e->Iex.Binop.arg1->tag == Iex_RdTmp
2602      && e->Iex.Binop.arg2->tag == Iex_Const) {
2603      ae = LibVEX_Alloc(sizeof(AvailExpr));
2604      ae->tag        = Btc;
2605      ae->u.Btc.op   = e->Iex.Binop.op;
2606      ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2607      ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2608      return ae;
2609   }
2610
2611   if (e->tag == Iex_Binop
2612      && e->Iex.Binop.arg1->tag == Iex_Const
2613      && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2614      ae = LibVEX_Alloc(sizeof(AvailExpr));
2615      ae->tag        = Bct;
2616      ae->u.Bct.op   = e->Iex.Binop.op;
2617      ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2618      ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
2619      return ae;
2620   }
2621
2622   if (e->tag == Iex_Const
2623       && e->Iex.Const.con->tag == Ico_F64i) {
2624      ae = LibVEX_Alloc(sizeof(AvailExpr));
2625      ae->tag          = Cf64i;
2626      ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2627      return ae;
2628   }
2629
2630   if (e->tag == Iex_Mux0X
2631       && e->Iex.Mux0X.cond->tag == Iex_RdTmp
2632       && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
2633       && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
2634      ae = LibVEX_Alloc(sizeof(AvailExpr));
2635      ae->tag       = Mttt;
2636      ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
2637      ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
2638      ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
2639      return ae;
2640   }
2641
2642   if (e->tag == Iex_GetI
2643       && e->Iex.GetI.ix->tag == Iex_RdTmp) {
2644      ae = LibVEX_Alloc(sizeof(AvailExpr));
2645      ae->tag           = GetIt;
2646      ae->u.GetIt.descr = e->Iex.GetI.descr;
2647      ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
2648      ae->u.GetIt.bias  = e->Iex.GetI.bias;
2649      return ae;
2650   }
2651
2652   return NULL;
2653}
2654
2655
2656/* The BB is modified in-place.  Returns True if any changes were
2657   made. */
2658
2659static Bool do_cse_BB ( IRSB* bb )
2660{
2661   Int        i, j, paranoia;
2662   IRTemp     t, q;
2663   IRStmt*    st;
2664   AvailExpr* eprime;
2665   AvailExpr* ae;
2666   Bool       invalidate;
2667   Bool       anyDone = False;
2668
2669   HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2670   HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2671
2672   vassert(sizeof(IRTemp) <= sizeof(HWord));
2673
2674   if (0) { ppIRSB(bb); vex_printf("\n\n"); }
2675
2676   /* Iterate forwards over the stmts.
2677      On seeing "t = E", where E is one of the 5 AvailExpr forms:
2678         let E' = apply tenv substitution to E
2679         search aenv for E'
2680            if a mapping E' -> q is found,
2681               replace this stmt by "t = q"
2682               and add binding t -> q to tenv
2683            else
2684               add binding E' -> t to aenv
2685               replace this stmt by "t = E'"
2686
2687      Other statements are only interesting to the extent that they
2688      might invalidate some of the expressions in aenv.  So there is
2689      an invalidate-bindings check for each statement seen.
2690   */
2691   for (i = 0; i < bb->stmts_used; i++) {
2692      st = bb->stmts[i];
2693
2694      /* ------ BEGIN invalidate aenv bindings ------ */
2695      /* This is critical: remove from aenv any E' -> .. bindings
2696         which might be invalidated by this statement.  The only
2697         vulnerable kind of bindings are the GetI kind.
2698            Dirty call - dump (paranoia level -> 2)
2699            Store      - dump (ditto)
2700            Put, PutI  - dump unless no-overlap is proven (.. -> 1)
2701         Uses getAliasingRelation_IC and getAliasingRelation_II
2702         to do the no-overlap assessments needed for Put/PutI.
2703      */
2704      switch (st->tag) {
2705         case Ist_Dirty: case Ist_Store: case Ist_MBE:
2706         case Ist_CAS: case Ist_LLSC:
2707            paranoia = 2; break;
2708         case Ist_Put: case Ist_PutI:
2709            paranoia = 1; break;
2710         case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
2711         case Ist_WrTmp: case Ist_Exit:
2712            paranoia = 0; break;
2713         default:
2714            vpanic("do_cse_BB(1)");
2715      }
2716
2717      if (paranoia > 0) {
2718         for (j = 0; j < aenv->used; j++) {
2719            if (!aenv->inuse[j])
2720               continue;
2721            ae = (AvailExpr*)aenv->key[j];
2722            if (ae->tag != GetIt)
2723               continue;
2724            invalidate = False;
2725            if (paranoia >= 2) {
2726               invalidate = True;
2727            } else {
2728               vassert(paranoia == 1);
2729               if (st->tag == Ist_Put) {
2730                  if (getAliasingRelation_IC(
2731                         ae->u.GetIt.descr,
2732                         IRExpr_RdTmp(ae->u.GetIt.ix),
2733                         st->Ist.Put.offset,
2734                         typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
2735                      ) != NoAlias)
2736                     invalidate = True;
2737               }
2738               else
2739               if (st->tag == Ist_PutI) {
2740                  if (getAliasingRelation_II(
2741                         ae->u.GetIt.descr,
2742                         IRExpr_RdTmp(ae->u.GetIt.ix),
2743                         ae->u.GetIt.bias,
2744                         st->Ist.PutI.descr,
2745                         st->Ist.PutI.ix,
2746                         st->Ist.PutI.bias
2747                      ) != NoAlias)
2748                     invalidate = True;
2749               }
2750               else
2751                  vpanic("do_cse_BB(2)");
2752            }
2753
2754            if (invalidate) {
2755               aenv->inuse[j] = False;
2756               aenv->key[j]   = (HWord)NULL;  /* be sure */
2757            }
2758         } /* for j */
2759      } /* paranoia > 0 */
2760
2761      /* ------ ENV invalidate aenv bindings ------ */
2762
2763      /* ignore not-interestings */
2764      if (st->tag != Ist_WrTmp)
2765         continue;
2766
2767      t = st->Ist.WrTmp.tmp;
2768      eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
2769      /* ignore if not of AvailExpr form */
2770      if (!eprime)
2771         continue;
2772
2773      /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2774
2775      /* apply tenv */
2776      subst_AvailExpr( tenv, eprime );
2777
2778      /* search aenv for eprime, unfortunately the hard way */
2779      for (j = 0; j < aenv->used; j++)
2780         if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2781            break;
2782
2783      if (j < aenv->used) {
2784         /* A binding E' -> q was found.  Replace stmt by "t = q" and
2785            note the t->q binding in tenv. */
2786         /* (this is the core of the CSE action) */
2787         q = (IRTemp)aenv->val[j];
2788         bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
2789         addToHHW( tenv, (HWord)t, (HWord)q );
2790         anyDone = True;
2791      } else {
2792         /* No binding was found, so instead we add E' -> t to our
2793            collection of available expressions, replace this stmt
2794            with "t = E'", and move on. */
2795         bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
2796         addToHHW( aenv, (HWord)eprime, (HWord)t );
2797      }
2798   }
2799
2800   /*
2801   ppIRSB(bb);
2802   sanityCheckIRSB(bb, Ity_I32);
2803   vex_printf("\n\n");
2804   */
2805   return anyDone;
2806}
2807
2808
2809/*---------------------------------------------------------------*/
2810/*--- Add32/Sub32 chain collapsing                            ---*/
2811/*---------------------------------------------------------------*/
2812
2813/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2814
2815/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
2816   yes, set *tmp and *i32 appropriately.  *i32 is set as if the
2817   root node is Add32, not Sub32. */
2818
2819static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2820{
2821   if (e->tag != Iex_Binop)
2822      return False;
2823   if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2824      return False;
2825   if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
2826      return False;
2827   if (e->Iex.Binop.arg2->tag != Iex_Const)
2828      return False;
2829   *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2830   *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2831   if (e->Iex.Binop.op == Iop_Sub32)
2832      *i32 = -*i32;
2833   return True;
2834}
2835
2836
2837/* Figure out if tmp can be expressed as tmp2 +32 const, for some
2838   other tmp2.  Scan backwards from the specified start point -- an
2839   optimisation. */
2840
2841static Bool collapseChain ( IRSB* bb, Int startHere,
2842                            IRTemp tmp,
2843                            IRTemp* tmp2, Int* i32 )
2844{
2845   Int     j, ii;
2846   IRTemp  vv;
2847   IRStmt* st;
2848   IRExpr* e;
2849
2850   /* the (var, con) pair contain the current 'representation' for
2851      'tmp'.  We start with 'tmp + 0'.  */
2852   IRTemp var = tmp;
2853   Int    con = 0;
2854
2855   /* Scan backwards to see if tmp can be replaced by some other tmp
2856     +/- a constant. */
2857   for (j = startHere; j >= 0; j--) {
2858      st = bb->stmts[j];
2859      if (st->tag != Ist_WrTmp)
2860         continue;
2861      if (st->Ist.WrTmp.tmp != var)
2862         continue;
2863      e = st->Ist.WrTmp.data;
2864      if (!isAdd32OrSub32(e, &vv, &ii))
2865         break;
2866      var = vv;
2867      con += ii;
2868   }
2869   if (j == -1)
2870      /* no earlier binding for var .. ill-formed IR */
2871      vpanic("collapseChain");
2872
2873   /* so, did we find anything interesting? */
2874   if (var == tmp)
2875      return False; /* no .. */
2876
2877   *tmp2 = var;
2878   *i32  = con;
2879   return True;
2880}
2881
2882
2883/* ------- Main function for Add32/Sub32 chain collapsing ------ */
2884
2885static void collapse_AddSub_chains_BB ( IRSB* bb )
2886{
2887   IRStmt *st;
2888   IRTemp var, var2;
2889   Int    i, con, con2;
2890
2891   for (i = bb->stmts_used-1; i >= 0; i--) {
2892      st = bb->stmts[i];
2893      if (st->tag == Ist_NoOp)
2894         continue;
2895
2896      /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
2897
2898      if (st->tag == Ist_WrTmp
2899          && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
2900
2901         /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
2902            Find out if var can be expressed as var2 + con2. */
2903         if (collapseChain(bb, i-1, var, &var2, &con2)) {
2904            if (DEBUG_IROPT) {
2905               vex_printf("replacing1 ");
2906               ppIRStmt(st);
2907               vex_printf(" with ");
2908            }
2909            con2 += con;
2910            bb->stmts[i]
2911               = IRStmt_WrTmp(
2912                    st->Ist.WrTmp.tmp,
2913                    (con2 >= 0)
2914                      ? IRExpr_Binop(Iop_Add32,
2915                                     IRExpr_RdTmp(var2),
2916                                     IRExpr_Const(IRConst_U32(con2)))
2917                      : IRExpr_Binop(Iop_Sub32,
2918                                     IRExpr_RdTmp(var2),
2919                                     IRExpr_Const(IRConst_U32(-con2)))
2920                 );
2921            if (DEBUG_IROPT) {
2922               ppIRStmt(bb->stmts[i]);
2923               vex_printf("\n");
2924            }
2925         }
2926
2927         continue;
2928      }
2929
2930      /* Try to collapse 't1 = GetI[t2, con]'. */
2931
2932      if (st->tag == Ist_WrTmp
2933          && st->Ist.WrTmp.data->tag == Iex_GetI
2934          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
2935          && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
2936                                      ->Iex.RdTmp.tmp, &var2, &con2)) {
2937         if (DEBUG_IROPT) {
2938            vex_printf("replacing3 ");
2939            ppIRStmt(st);
2940            vex_printf(" with ");
2941         }
2942         con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
2943         bb->stmts[i]
2944            = IRStmt_WrTmp(
2945                 st->Ist.WrTmp.tmp,
2946                 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
2947                             IRExpr_RdTmp(var2),
2948                             con2));
2949         if (DEBUG_IROPT) {
2950            ppIRStmt(bb->stmts[i]);
2951            vex_printf("\n");
2952         }
2953         continue;
2954      }
2955
2956      /* Perhaps st is PutI[t, con] ? */
2957
2958      if (st->tag == Ist_PutI
2959          && st->Ist.PutI.ix->tag == Iex_RdTmp
2960          && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
2961                               &var2, &con2)) {
2962         if (DEBUG_IROPT) {
2963            vex_printf("replacing2 ");
2964            ppIRStmt(st);
2965            vex_printf(" with ");
2966         }
2967         con2 += st->Ist.PutI.bias;
2968         bb->stmts[i]
2969           = IRStmt_PutI(st->Ist.PutI.descr,
2970                         IRExpr_RdTmp(var2),
2971                         con2,
2972                         st->Ist.PutI.data);
2973         if (DEBUG_IROPT) {
2974            ppIRStmt(bb->stmts[i]);
2975            vex_printf("\n");
2976         }
2977         continue;
2978      }
2979
2980   } /* for */
2981}
2982
2983
2984/*---------------------------------------------------------------*/
2985/*--- PutI/GetI transformations                               ---*/
2986/*---------------------------------------------------------------*/
2987
2988/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
2989   the given starting point to find, if any, a PutI which writes
2990   exactly the same piece of guest state, and so return the expression
2991   that the PutI writes.  This is the core of PutI-GetI forwarding. */
2992
2993static
2994IRExpr* findPutI ( IRSB* bb, Int startHere,
2995                   IRRegArray* descrG, IRExpr* ixG, Int biasG )
2996{
2997   Int        j;
2998   IRStmt*    st;
2999   GSAliasing relation;
3000
3001   if (0) {
3002      vex_printf("\nfindPutI ");
3003      ppIRRegArray(descrG);
3004      vex_printf(" ");
3005      ppIRExpr(ixG);
3006      vex_printf(" %d\n", biasG);
3007   }
3008
3009   /* Scan backwards in bb from startHere to find a suitable PutI
3010      binding for (descrG, ixG, biasG), if any. */
3011
3012   for (j = startHere; j >= 0; j--) {
3013      st = bb->stmts[j];
3014      if (st->tag == Ist_NoOp)
3015         continue;
3016
3017      if (st->tag == Ist_Put) {
3018         /* Non-indexed Put.  This can't give a binding, but we do
3019            need to check it doesn't invalidate the search by
3020            overlapping any part of the indexed guest state. */
3021
3022         relation
3023            = getAliasingRelation_IC(
3024                 descrG, ixG,
3025                 st->Ist.Put.offset,
3026                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
3027
3028         if (relation == NoAlias) {
3029            /* we're OK; keep going */
3030            continue;
3031         } else {
3032            /* relation == UnknownAlias || relation == ExactAlias */
3033            /* If this assertion fails, we've found a Put which writes
3034               an area of guest state which is read by a GetI.  Which
3035               is unlikely (although not per se wrong). */
3036            vassert(relation != ExactAlias);
3037            /* This Put potentially writes guest state that the GetI
3038               reads; we must fail. */
3039            return NULL;
3040         }
3041      }
3042
3043      if (st->tag == Ist_PutI) {
3044
3045         relation = getAliasingRelation_II(
3046                       descrG, ixG, biasG,
3047                       st->Ist.PutI.descr,
3048                       st->Ist.PutI.ix,
3049                       st->Ist.PutI.bias
3050                    );
3051
3052         if (relation == NoAlias) {
3053            /* This PutI definitely doesn't overlap.  Ignore it and
3054               keep going. */
3055            continue; /* the for j loop */
3056         }
3057
3058         if (relation == UnknownAlias) {
3059            /* We don't know if this PutI writes to the same guest
3060               state that the GetI, or not.  So we have to give up. */
3061            return NULL;
3062         }
3063
3064         /* Otherwise, we've found what we're looking for.  */
3065         vassert(relation == ExactAlias);
3066         return st->Ist.PutI.data;
3067
3068      } /* if (st->tag == Ist_PutI) */
3069
3070      if (st->tag == Ist_Dirty) {
3071         /* Be conservative.  If the dirty call has any guest effects at
3072            all, give up.  We could do better -- only give up if there
3073            are any guest writes/modifies. */
3074         if (st->Ist.Dirty.details->nFxState > 0)
3075            return NULL;
3076      }
3077
3078   } /* for */
3079
3080   /* No valid replacement was found. */
3081   return NULL;
3082}
3083
3084
3085
3086/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3087   that it writes exactly the same piece of guest state) ?  Safe
3088   answer: False. */
3089
3090static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3091{
3092   vassert(pi->tag == Ist_PutI);
3093   if (s2->tag != Ist_PutI)
3094      return False;
3095
3096   return toBool(
3097          getAliasingRelation_II(
3098             pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3099             s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3100          )
3101          == ExactAlias
3102          );
3103}
3104
3105
3106/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3107   overlap it?  Safe answer: True.  Note, we could do a lot better
3108   than this if needed. */
3109
3110static
3111Bool guestAccessWhichMightOverlapPutI (
3112        IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3113     )
3114{
3115   GSAliasing relation;
3116   UInt       minoffP, maxoffP;
3117
3118   vassert(pi->tag == Ist_PutI);
3119   getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
3120   switch (s2->tag) {
3121
3122      case Ist_NoOp:
3123      case Ist_IMark:
3124         return False;
3125
3126      case Ist_MBE:
3127      case Ist_AbiHint:
3128         /* just be paranoid ... these should be rare. */
3129         return True;
3130
3131      case Ist_CAS:
3132         /* This is unbelievably lame, but it's probably not
3133            significant from a performance point of view.  Really, a
3134            CAS is a load-store op, so it should be safe to say False.
3135            However .. */
3136         return True;
3137
3138      case Ist_Dirty:
3139         /* If the dirty call has any guest effects at all, give up.
3140            Probably could do better. */
3141         if (s2->Ist.Dirty.details->nFxState > 0)
3142            return True;
3143         return False;
3144
3145      case Ist_Put:
3146         vassert(isIRAtom(s2->Ist.Put.data));
3147         relation
3148            = getAliasingRelation_IC(
3149                 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3150                 s2->Ist.Put.offset,
3151                 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3152              );
3153         goto have_relation;
3154
3155      case Ist_PutI:
3156         vassert(isIRAtom(s2->Ist.PutI.ix));
3157         vassert(isIRAtom(s2->Ist.PutI.data));
3158         relation
3159            = getAliasingRelation_II(
3160                 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3161                 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3162              );
3163         goto have_relation;
3164
3165      case Ist_WrTmp:
3166         if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3167            relation
3168               = getAliasingRelation_II(
3169                    pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3170                                        pi->Ist.PutI.bias,
3171                    s2->Ist.WrTmp.data->Iex.GetI.descr,
3172                    s2->Ist.WrTmp.data->Iex.GetI.ix,
3173                    s2->Ist.WrTmp.data->Iex.GetI.bias
3174                 );
3175            goto have_relation;
3176         }
3177         if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3178            relation
3179               = getAliasingRelation_IC(
3180                    pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3181                    s2->Ist.WrTmp.data->Iex.Get.offset,
3182                    s2->Ist.WrTmp.data->Iex.Get.ty
3183                 );
3184            goto have_relation;
3185         }
3186         return False;
3187
3188      case Ist_Store:
3189         vassert(isIRAtom(s2->Ist.Store.addr));
3190         vassert(isIRAtom(s2->Ist.Store.data));
3191         return False;
3192
3193      default:
3194         vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3195         vpanic("guestAccessWhichMightOverlapPutI");
3196   }
3197
3198  have_relation:
3199   if (relation == NoAlias)
3200      return False;
3201   else
3202      return True; /* ExactAlias or UnknownAlias */
3203}
3204
3205
3206
3207/* ---------- PutI/GetI transformations main functions --------- */
3208
3209/* Remove redundant GetIs, to the extent that they can be detected.
3210   bb is modified in-place. */
3211
3212static
3213void do_redundant_GetI_elimination ( IRSB* bb )
3214{
3215   Int     i;
3216   IRStmt* st;
3217
3218   for (i = bb->stmts_used-1; i >= 0; i--) {
3219      st = bb->stmts[i];
3220      if (st->tag == Ist_NoOp)
3221         continue;
3222
3223      if (st->tag == Ist_WrTmp
3224          && st->Ist.WrTmp.data->tag == Iex_GetI
3225          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3226         IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3227         IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
3228         Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
3229         IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
3230         if (replacement
3231             && isIRAtom(replacement)
3232             /* Make sure we're doing a type-safe transformation! */
3233             && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3234            if (DEBUG_IROPT) {
3235               vex_printf("rGI:  ");
3236               ppIRExpr(st->Ist.WrTmp.data);
3237               vex_printf(" -> ");
3238               ppIRExpr(replacement);
3239               vex_printf("\n");
3240            }
3241            bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3242         }
3243      }
3244   }
3245
3246}
3247
3248
3249/* Remove redundant PutIs, to the extent which they can be detected.
3250   bb is modified in-place. */
3251
3252static
3253void do_redundant_PutI_elimination ( IRSB* bb )
3254{
3255   Int    i, j;
3256   Bool   delete;
3257   IRStmt *st, *stj;
3258
3259   for (i = 0; i < bb->stmts_used; i++) {
3260      st = bb->stmts[i];
3261      if (st->tag != Ist_PutI)
3262         continue;
3263      /* Ok, search forwards from here to see if we can find another
3264         PutI which makes this one redundant, and dodging various
3265         hazards.  Search forwards:
3266         * If conditional exit, give up (because anything after that
3267           does not postdominate this put).
3268         * If a Get which might overlap, give up (because this PutI
3269           not necessarily dead).
3270         * If a Put which is identical, stop with success.
3271         * If a Put which might overlap, but is not identical, give up.
3272         * If a dirty helper call which might write guest state, give up.
3273         * If a Put which definitely doesn't overlap, or any other
3274           kind of stmt, continue.
3275      */
3276      delete = False;
3277      for (j = i+1; j < bb->stmts_used; j++) {
3278         stj = bb->stmts[j];
3279         if (stj->tag == Ist_NoOp)
3280            continue;
3281         if (identicalPutIs(st, stj)) {
3282            /* success! */
3283            delete = True;
3284            break;
3285         }
3286         if (stj->tag == Ist_Exit)
3287            /* give up */
3288            break;
3289         if (st->tag == Ist_Dirty)
3290            /* give up; could do better here */
3291            break;
3292         if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3293            /* give up */
3294           break;
3295      }
3296
3297      if (delete) {
3298         if (DEBUG_IROPT) {
3299            vex_printf("rPI:  ");
3300            ppIRStmt(st);
3301            vex_printf("\n");
3302         }
3303         bb->stmts[i] = IRStmt_NoOp();
3304      }
3305
3306   }
3307}
3308
3309
3310/*---------------------------------------------------------------*/
3311/*--- Loop unrolling                                          ---*/
3312/*---------------------------------------------------------------*/
3313
3314/* Adjust all tmp values (names) in e by delta.  e is destructively
3315   modified. */
3316
3317static void deltaIRExpr ( IRExpr* e, Int delta )
3318{
3319   Int i;
3320   switch (e->tag) {
3321      case Iex_RdTmp:
3322         e->Iex.RdTmp.tmp += delta;
3323         break;
3324      case Iex_Get:
3325      case Iex_Const:
3326         break;
3327      case Iex_GetI:
3328         deltaIRExpr(e->Iex.GetI.ix, delta);
3329         break;
3330      case Iex_Qop:
3331         deltaIRExpr(e->Iex.Qop.arg1, delta);
3332         deltaIRExpr(e->Iex.Qop.arg2, delta);
3333         deltaIRExpr(e->Iex.Qop.arg3, delta);
3334         deltaIRExpr(e->Iex.Qop.arg4, delta);
3335         break;
3336      case Iex_Triop:
3337         deltaIRExpr(e->Iex.Triop.arg1, delta);
3338         deltaIRExpr(e->Iex.Triop.arg2, delta);
3339         deltaIRExpr(e->Iex.Triop.arg3, delta);
3340         break;
3341      case Iex_Binop:
3342         deltaIRExpr(e->Iex.Binop.arg1, delta);
3343         deltaIRExpr(e->Iex.Binop.arg2, delta);
3344         break;
3345      case Iex_Unop:
3346         deltaIRExpr(e->Iex.Unop.arg, delta);
3347         break;
3348      case Iex_Load:
3349         deltaIRExpr(e->Iex.Load.addr, delta);
3350         break;
3351      case Iex_CCall:
3352         for (i = 0; e->Iex.CCall.args[i]; i++)
3353            deltaIRExpr(e->Iex.CCall.args[i], delta);
3354         break;
3355      case Iex_Mux0X:
3356         deltaIRExpr(e->Iex.Mux0X.cond, delta);
3357         deltaIRExpr(e->Iex.Mux0X.expr0, delta);
3358         deltaIRExpr(e->Iex.Mux0X.exprX, delta);
3359         break;
3360      default:
3361         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3362         vpanic("deltaIRExpr");
3363   }
3364}
3365
3366/* Adjust all tmp values (names) in st by delta.  st is destructively
3367   modified. */
3368
3369static void deltaIRStmt ( IRStmt* st, Int delta )
3370{
3371   Int      i;
3372   IRDirty* d;
3373   switch (st->tag) {
3374      case Ist_NoOp:
3375      case Ist_IMark:
3376      case Ist_MBE:
3377         break;
3378      case Ist_AbiHint:
3379         deltaIRExpr(st->Ist.AbiHint.base, delta);
3380         deltaIRExpr(st->Ist.AbiHint.nia, delta);
3381         break;
3382      case Ist_Put:
3383         deltaIRExpr(st->Ist.Put.data, delta);
3384         break;
3385      case Ist_PutI:
3386         deltaIRExpr(st->Ist.PutI.ix, delta);
3387         deltaIRExpr(st->Ist.PutI.data, delta);
3388         break;
3389      case Ist_WrTmp:
3390         st->Ist.WrTmp.tmp += delta;
3391         deltaIRExpr(st->Ist.WrTmp.data, delta);
3392         break;
3393      case Ist_Exit:
3394         deltaIRExpr(st->Ist.Exit.guard, delta);
3395         break;
3396      case Ist_Store:
3397         deltaIRExpr(st->Ist.Store.addr, delta);
3398         deltaIRExpr(st->Ist.Store.data, delta);
3399         break;
3400      case Ist_CAS:
3401         if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
3402            st->Ist.CAS.details->oldHi += delta;
3403         st->Ist.CAS.details->oldLo += delta;
3404         deltaIRExpr(st->Ist.CAS.details->addr, delta);
3405         if (st->Ist.CAS.details->expdHi)
3406            deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
3407         deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
3408         if (st->Ist.CAS.details->dataHi)
3409            deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
3410         deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
3411         break;
3412      case Ist_LLSC:
3413         st->Ist.LLSC.result += delta;
3414         deltaIRExpr(st->Ist.LLSC.addr, delta);
3415         if (st->Ist.LLSC.storedata)
3416            deltaIRExpr(st->Ist.LLSC.storedata, delta);
3417         break;
3418      case Ist_Dirty:
3419         d = st->Ist.Dirty.details;
3420         deltaIRExpr(d->guard, delta);
3421         for (i = 0; d->args[i]; i++)
3422            deltaIRExpr(d->args[i], delta);
3423         if (d->tmp != IRTemp_INVALID)
3424            d->tmp += delta;
3425         if (d->mAddr)
3426            deltaIRExpr(d->mAddr, delta);
3427         break;
3428      default:
3429         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3430         vpanic("deltaIRStmt");
3431   }
3432}
3433
3434
3435/* If possible, return a loop-unrolled version of bb0.  The original
3436   is changed.  If not possible, return NULL.  */
3437
3438/* The two schemas considered are:
3439
3440     X: BODY; goto X
3441
3442     which unrolls to (eg)  X: BODY;BODY; goto X
3443
3444   and
3445
3446       X: BODY; if (c) goto X; goto Y
3447   which trivially transforms to
3448       X: BODY; if (!c) goto Y; goto X;
3449   so it falls in the scope of the first case.
3450
3451   X and Y must be literal (guest) addresses.
3452*/
3453
3454static Int calc_unroll_factor( IRSB* bb )
3455{
3456   Int n_stmts, i;
3457
3458   n_stmts = 0;
3459   for (i = 0; i < bb->stmts_used; i++) {
3460      if (bb->stmts[i]->tag != Ist_NoOp)
3461         n_stmts++;
3462   }
3463
3464   if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
3465      if (vex_control.iropt_verbosity > 0)
3466         vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
3467                    n_stmts, 8* n_stmts);
3468      return 8;
3469   }
3470   if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
3471      if (vex_control.iropt_verbosity > 0)
3472         vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
3473                    n_stmts, 4* n_stmts);
3474      return 4;
3475   }
3476
3477   if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
3478      if (vex_control.iropt_verbosity > 0)
3479         vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
3480                    n_stmts, 2* n_stmts);
3481      return 2;
3482   }
3483
3484   if (vex_control.iropt_verbosity > 0)
3485      vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
3486
3487   return 1;
3488}
3489
3490
3491static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
3492{
3493   Int      i, j, jmax, n_vars;
3494   Bool     xxx_known;
3495   Addr64   xxx_value, yyy_value;
3496   IRExpr*  udst;
3497   IRStmt*  st;
3498   IRConst* con;
3499   IRSB     *bb1, *bb2;
3500   Int      unroll_factor;
3501
3502   if (vex_control.iropt_unroll_thresh <= 0)
3503      return NULL;
3504
3505   /* First off, figure out if we can unroll this loop.  Do this
3506      without modifying bb0. */
3507
3508   if (bb0->jumpkind != Ijk_Boring)
3509      return NULL;
3510
3511   xxx_known = False;
3512   xxx_value = 0;
3513
3514   /* Extract the next-guest address.  If it isn't a literal, we
3515      have to give up. */
3516
3517   udst = bb0->next;
3518   if (udst->tag == Iex_Const
3519       && (udst->Iex.Const.con->tag == Ico_U32
3520           || udst->Iex.Const.con->tag == Ico_U64)) {
3521      /* The BB ends in a jump to a literal location. */
3522      xxx_known = True;
3523      xxx_value = udst->Iex.Const.con->tag == Ico_U64
3524                    ?  udst->Iex.Const.con->Ico.U64
3525                    : (Addr64)(udst->Iex.Const.con->Ico.U32);
3526   }
3527
3528   if (!xxx_known)
3529      return NULL;
3530
3531   /* Now we know the BB ends to a jump to a literal location.  If
3532      it's a jump to itself (viz, idiom #1), move directly to the
3533      unrolling stage, first cloning the bb so the original isn't
3534      modified. */
3535   if (xxx_value == my_addr) {
3536      unroll_factor = calc_unroll_factor( bb0 );
3537      if (unroll_factor < 2)
3538         return NULL;
3539      bb1 = deepCopyIRSB( bb0 );
3540      bb0 = NULL;
3541      udst = NULL; /* is now invalid */
3542      goto do_unroll;
3543   }
3544
3545   /* Search for the second idiomatic form:
3546        X: BODY; if (c) goto X; goto Y
3547      We know Y, but need to establish that the last stmt
3548      is 'if (c) goto X'.
3549   */
3550   yyy_value = xxx_value;
3551   for (i = bb0->stmts_used-1; i >= 0; i--)
3552      if (bb0->stmts[i])
3553         break;
3554
3555   if (i < 0)
3556      return NULL; /* block with no stmts.  Strange. */
3557
3558   st = bb0->stmts[i];
3559   if (st->tag != Ist_Exit)
3560      return NULL;
3561   if (st->Ist.Exit.jk != Ijk_Boring)
3562      return NULL;
3563
3564   con = st->Ist.Exit.dst;
3565   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3566
3567   xxx_value = con->tag == Ico_U64
3568                  ? st->Ist.Exit.dst->Ico.U64
3569                  : (Addr64)(st->Ist.Exit.dst->Ico.U32);
3570
3571   /* If this assertion fails, we have some kind of type error. */
3572   vassert(con->tag == udst->Iex.Const.con->tag);
3573
3574   if (xxx_value != my_addr)
3575      /* We didn't find either idiom.  Give up. */
3576      return NULL;
3577
3578   /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
3579      yyy values (which makes it look like idiom #1), and go into
3580      unrolling proper.  This means finding (again) the last stmt, in
3581      the copied BB. */
3582
3583   unroll_factor = calc_unroll_factor( bb0 );
3584   if (unroll_factor < 2)
3585      return NULL;
3586
3587   bb1 = deepCopyIRSB( bb0 );
3588   bb0 = NULL;
3589   udst = NULL; /* is now invalid */
3590   for (i = bb1->stmts_used-1; i >= 0; i--)
3591      if (bb1->stmts[i])
3592         break;
3593
3594   /* The next bunch of assertions should be true since we already
3595      found and checked the last stmt in the original bb. */
3596
3597   vassert(i >= 0);
3598
3599   st = bb1->stmts[i];
3600   vassert(st->tag == Ist_Exit);
3601
3602   con = st->Ist.Exit.dst;
3603   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3604
3605   udst = bb1->next;
3606   vassert(udst->tag == Iex_Const);
3607   vassert(udst->Iex.Const.con->tag == Ico_U32
3608          || udst->Iex.Const.con->tag == Ico_U64);
3609   vassert(con->tag == udst->Iex.Const.con->tag);
3610
3611   /* switch the xxx and yyy fields around */
3612   if (con->tag == Ico_U64) {
3613      udst->Iex.Const.con->Ico.U64 = xxx_value;
3614      con->Ico.U64 = yyy_value;
3615   } else {
3616      udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
3617      con->Ico.U32 = (UInt)yyy_value;
3618   }
3619
3620   /* negate the test condition */
3621   st->Ist.Exit.guard
3622      = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
3623
3624   /* --- The unroller proper.  Both idioms are by now --- */
3625   /* --- now converted to idiom 1. --- */
3626
3627  do_unroll:
3628
3629   vassert(unroll_factor == 2
3630           || unroll_factor == 4
3631           || unroll_factor == 8);
3632
3633   jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3634   for (j = 1; j <= jmax; j++) {
3635
3636      n_vars = bb1->tyenv->types_used;
3637
3638      bb2 = deepCopyIRSB(bb1);
3639      for (i = 0; i < n_vars; i++)
3640         (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3641
3642      for (i = 0; i < bb2->stmts_used; i++) {
3643         /* deltaIRStmt destructively modifies the stmt, but
3644            that's OK since bb2 is a complete fresh copy of bb1. */
3645         deltaIRStmt(bb2->stmts[i], n_vars);
3646         addStmtToIRSB(bb1, bb2->stmts[i]);
3647      }
3648   }
3649
3650   if (DEBUG_IROPT) {
3651      vex_printf("\nUNROLLED (%llx)\n", my_addr);
3652      ppIRSB(bb1);
3653      vex_printf("\n");
3654   }
3655
3656   /* Flattening; sigh.  The unroller succeeds in breaking flatness
3657      by negating the test condition.  This should be fixed properly.
3658      For the moment use this shotgun approach.  */
3659   return flatten_BB(bb1);
3660}
3661
3662
3663/*---------------------------------------------------------------*/
3664/*--- The tree builder                                        ---*/
3665/*---------------------------------------------------------------*/
3666
3667/* This isn't part of IR optimisation.  Really it's a pass done prior
3668   to instruction selection, which improves the code that the
3669   instruction selector can produce. */
3670
3671/* --- The 'tmp' environment is the central data structure here --- */
3672
3673/* The number of outstanding bindings we're prepared to track.
3674   The number of times the env becomes full and we have to dump
3675   the oldest binding (hence reducing code quality) falls very
3676   rapidly as the env size increases.  8 gives reasonable performance
3677   under most circumstances. */
3678#define A_NENV 10
3679
3680/* bindee == NULL   ===  slot is not in use
3681   bindee != NULL   ===  slot is in use
3682*/
3683typedef
3684   struct {
3685      IRTemp  binder;
3686      IRExpr* bindee;
3687      Bool    doesLoad;
3688      Bool    doesGet;
3689   }
3690   ATmpInfo;
3691
3692__attribute__((unused))
3693static void ppAEnv ( ATmpInfo* env )
3694{
3695   Int i;
3696   for (i = 0; i < A_NENV; i++) {
3697      vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
3698      if (env[i].bindee)
3699         ppIRExpr(env[i].bindee);
3700      else
3701         vex_printf("(null)");
3702      vex_printf("\n");
3703   }
3704}
3705
3706/* --- Tree-traversal fns --- */
3707
3708/* Traverse an expr, and detect if any part of it reads memory or does
3709   a Get.  Be careful ... this really controls how much the
3710   tree-builder can reorder the code, so getting it right is critical.
3711*/
3712static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3713{
3714   Int i;
3715   switch (e->tag) {
3716      case Iex_CCall:
3717         for (i = 0; e->Iex.CCall.args[i]; i++)
3718            setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3719         return;
3720      case Iex_Mux0X:
3721         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3722         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3723         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3724         return;
3725      case Iex_Qop:
3726         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
3727         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
3728         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
3729         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
3730         return;
3731      case Iex_Triop:
3732         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
3733         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
3734         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
3735         return;
3736      case Iex_Binop:
3737         setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3738         setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3739         return;
3740      case Iex_Unop:
3741         setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3742         return;
3743      case Iex_Load:
3744         *doesLoad = True;
3745         setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
3746         return;
3747      case Iex_Get:
3748         *doesGet = True;
3749         return;
3750      case Iex_GetI:
3751         *doesGet = True;
3752         setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
3753         return;
3754      case Iex_RdTmp:
3755      case Iex_Const:
3756         return;
3757      default:
3758         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3759         vpanic("setHints_Expr");
3760   }
3761}
3762
3763
3764/* Add a binding to the front of the env and slide all the rest
3765   backwards.  It should be the case that the last slot is free. */
3766static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
3767{
3768   Int i;
3769   vassert(env[A_NENV-1].bindee == NULL);
3770   for (i = A_NENV-1; i >= 1; i--)
3771      env[i] = env[i-1];
3772   env[0].binder   = binder;
3773   env[0].bindee   = bindee;
3774   env[0].doesLoad = False; /* filled in later */
3775   env[0].doesGet  = False; /* filled in later */
3776}
3777
3778/* Given uses :: array of UShort, indexed by IRTemp
3779   Add the use-occurrences of temps in this expression
3780   to the env.
3781*/
3782static void aoccCount_Expr ( UShort* uses, IRExpr* e )
3783{
3784   Int i;
3785
3786   switch (e->tag) {
3787
3788      case Iex_RdTmp: /* the only interesting case */
3789         uses[e->Iex.RdTmp.tmp]++;
3790         return;
3791
3792      case Iex_Mux0X:
3793         aoccCount_Expr(uses, e->Iex.Mux0X.cond);
3794         aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
3795         aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
3796         return;
3797
3798      case Iex_Qop:
3799         aoccCount_Expr(uses, e->Iex.Qop.arg1);
3800         aoccCount_Expr(uses, e->Iex.Qop.arg2);
3801         aoccCount_Expr(uses, e->Iex.Qop.arg3);
3802         aoccCount_Expr(uses, e->Iex.Qop.arg4);
3803         return;
3804
3805      case Iex_Triop:
3806         aoccCount_Expr(uses, e->Iex.Triop.arg1);
3807         aoccCount_Expr(uses, e->Iex.Triop.arg2);
3808         aoccCount_Expr(uses, e->Iex.Triop.arg3);
3809         return;
3810
3811      case Iex_Binop:
3812         aoccCount_Expr(uses, e->Iex.Binop.arg1);
3813         aoccCount_Expr(uses, e->Iex.Binop.arg2);
3814         return;
3815
3816      case Iex_Unop:
3817         aoccCount_Expr(uses, e->Iex.Unop.arg);
3818         return;
3819
3820      case Iex_Load:
3821         aoccCount_Expr(uses, e->Iex.Load.addr);
3822         return;
3823
3824      case Iex_CCall:
3825         for (i = 0; e->Iex.CCall.args[i]; i++)
3826            aoccCount_Expr(uses, e->Iex.CCall.args[i]);
3827         return;
3828
3829      case Iex_GetI:
3830         aoccCount_Expr(uses, e->Iex.GetI.ix);
3831         return;
3832
3833      case Iex_Const:
3834      case Iex_Get:
3835         return;
3836
3837      default:
3838         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3839         vpanic("aoccCount_Expr");
3840    }
3841}
3842
3843
3844/* Given uses :: array of UShort, indexed by IRTemp
3845   Add the use-occurrences of temps in this statement
3846   to the env.
3847*/
3848static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
3849{
3850   Int      i;
3851   IRDirty* d;
3852   IRCAS*   cas;
3853   switch (st->tag) {
3854      case Ist_AbiHint:
3855         aoccCount_Expr(uses, st->Ist.AbiHint.base);
3856         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
3857         return;
3858      case Ist_WrTmp:
3859         aoccCount_Expr(uses, st->Ist.WrTmp.data);
3860         return;
3861      case Ist_Put:
3862         aoccCount_Expr(uses, st->Ist.Put.data);
3863         return;
3864      case Ist_PutI:
3865         aoccCount_Expr(uses, st->Ist.PutI.ix);
3866         aoccCount_Expr(uses, st->Ist.PutI.data);
3867         return;
3868      case Ist_Store:
3869         aoccCount_Expr(uses, st->Ist.Store.addr);
3870         aoccCount_Expr(uses, st->Ist.Store.data);
3871         return;
3872      case Ist_CAS:
3873         cas = st->Ist.CAS.details;
3874         aoccCount_Expr(uses, cas->addr);
3875         if (cas->expdHi)
3876            aoccCount_Expr(uses, cas->expdHi);
3877         aoccCount_Expr(uses, cas->expdLo);
3878         if (cas->dataHi)
3879            aoccCount_Expr(uses, cas->dataHi);
3880         aoccCount_Expr(uses, cas->dataLo);
3881         return;
3882      case Ist_LLSC:
3883         aoccCount_Expr(uses, st->Ist.LLSC.addr);
3884         if (st->Ist.LLSC.storedata)
3885            aoccCount_Expr(uses, st->Ist.LLSC.storedata);
3886         return;
3887      case Ist_Dirty:
3888         d = st->Ist.Dirty.details;
3889         if (d->mFx != Ifx_None)
3890            aoccCount_Expr(uses, d->mAddr);
3891         aoccCount_Expr(uses, d->guard);
3892         for (i = 0; d->args[i]; i++)
3893            aoccCount_Expr(uses, d->args[i]);
3894         return;
3895      case Ist_NoOp:
3896      case Ist_IMark:
3897      case Ist_MBE:
3898         return;
3899      case Ist_Exit:
3900         aoccCount_Expr(uses, st->Ist.Exit.guard);
3901         return;
3902      default:
3903         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3904         vpanic("aoccCount_Stmt");
3905   }
3906}
3907
3908/* Look up a binding for tmp in the env.  If found, return the bound
3909   expression, and set the env's binding to NULL so it is marked as
3910   used.  If not found, return NULL. */
3911
3912static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
3913{
3914   Int i;
3915   for (i = 0; i < A_NENV; i++) {
3916      if (env[i].binder == tmp && env[i].bindee != NULL) {
3917         IRExpr* bindee = env[i].bindee;
3918         env[i].bindee = NULL;
3919         return bindee;
3920      }
3921   }
3922   return NULL;
3923}
3924
3925/* Traverse e, looking for temps.  For each observed temp, see if env
3926   contains a binding for the temp, and if so return the bound value.
3927   The env has the property that any binding it holds is
3928   'single-shot', so once a binding is used, it is marked as no longer
3929   available, by setting its .bindee field to NULL. */
3930
3931static inline Bool is_Unop ( IRExpr* e, IROp op ) {
3932   return e->tag == Iex_Unop && e->Iex.Unop.op == op;
3933}
3934static inline Bool is_Binop ( IRExpr* e, IROp op ) {
3935   return e->tag == Iex_Binop && e->Iex.Binop.op == op;
3936}
3937
3938static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
3939{
3940   switch (op) {
3941   case Iop_Or32:
3942      /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
3943      if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
3944         return IRExpr_Unop( Iop_CmpwNEZ32,
3945                             IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
3946                                                     a2->Iex.Unop.arg ) );
3947      break;
3948   default:
3949      break;
3950   }
3951   /* no reduction rule applies */
3952   return IRExpr_Binop( op, a1, a2 );
3953}
3954
3955static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
3956{
3957   switch (op) {
3958   case Iop_CmpwNEZ64:
3959      /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
3960      if (is_Binop(aa, Iop_Or64)
3961          && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
3962         return fold_IRExpr_Unop(
3963                   Iop_CmpwNEZ64,
3964                   IRExpr_Binop(Iop_Or64,
3965                                aa->Iex.Binop.arg1->Iex.Unop.arg,
3966                                aa->Iex.Binop.arg2));
3967      /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
3968      if (is_Binop(aa, Iop_Or64)
3969          && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
3970         return fold_IRExpr_Unop(
3971                   Iop_CmpwNEZ64,
3972                   IRExpr_Binop(Iop_Or64,
3973                                aa->Iex.Binop.arg1,
3974                                aa->Iex.Binop.arg2->Iex.Unop.arg));
3975      break;
3976   case Iop_CmpNEZ64:
3977      /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
3978      if (is_Unop(aa, Iop_Left64))
3979         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
3980      break;
3981   case Iop_CmpwNEZ32:
3982      /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
3983      if (is_Unop(aa, Iop_CmpwNEZ32))
3984         return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
3985      break;
3986   case Iop_CmpNEZ32:
3987      /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
3988      if (is_Unop(aa, Iop_Left32))
3989         return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
3990      break;
3991   case Iop_Left32:
3992      /* Left32( Left32(x) ) --> Left32(x) */
3993      if (is_Unop(aa, Iop_Left32))
3994         return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
3995      break;
3996   case Iop_32to1:
3997      /* 32to1( 1Uto32 ( x ) ) --> x */
3998      if (is_Unop(aa, Iop_1Uto32))
3999         return aa->Iex.Unop.arg;
4000      /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
4001      if (is_Unop(aa, Iop_CmpwNEZ32))
4002         return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
4003      break;
4004   case Iop_64to1:
4005      /* 64to1( 1Uto64 ( x ) ) --> x */
4006      if (is_Unop(aa, Iop_1Uto64))
4007         return aa->Iex.Unop.arg;
4008      /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
4009      if (is_Unop(aa, Iop_CmpwNEZ64))
4010         return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
4011      break;
4012   case Iop_64to32:
4013      /* 64to32( 32Uto64 ( x )) --> x */
4014      if (is_Unop(aa, Iop_32Uto64))
4015         return aa->Iex.Unop.arg;
4016      /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
4017      if (is_Unop(aa, Iop_8Uto64))
4018         return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
4019      break;
4020
4021   case Iop_32Uto64:
4022      /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
4023      if (is_Unop(aa, Iop_8Uto32))
4024         return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
4025      /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
4026      if (is_Unop(aa, Iop_16Uto32))
4027         return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
4028      break;
4029
4030   case Iop_1Sto32:
4031      /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
4032      if (is_Unop(aa, Iop_CmpNEZ8)
4033          && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
4034          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
4035          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
4036                     Iop_CmpNEZ32)) {
4037         return IRExpr_Unop( Iop_CmpwNEZ32,
4038                             aa->Iex.Unop.arg->Iex.Unop.arg
4039                               ->Iex.Unop.arg->Iex.Unop.arg);
4040      }
4041      break;
4042
4043
4044   default:
4045      break;
4046   }
4047   /* no reduction rule applies */
4048   return IRExpr_Unop( op, aa );
4049}
4050
4051static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
4052{
4053   IRExpr*  e2;
4054   IRExpr** args2;
4055   Int      i;
4056
4057   switch (e->tag) {
4058
4059      case Iex_CCall:
4060         args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
4061         for (i = 0; args2[i]; i++)
4062            args2[i] = atbSubst_Expr(env,args2[i]);
4063         return IRExpr_CCall(
4064                   e->Iex.CCall.cee,
4065                   e->Iex.CCall.retty,
4066                   args2
4067                );
4068      case Iex_RdTmp:
4069         e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4070         return e2 ? e2 : e;
4071      case Iex_Mux0X:
4072         return IRExpr_Mux0X(
4073                   atbSubst_Expr(env, e->Iex.Mux0X.cond),
4074                   atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4075                   atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4076                );
4077      case Iex_Qop:
4078         return IRExpr_Qop(
4079                   e->Iex.Qop.op,
4080                   atbSubst_Expr(env, e->Iex.Qop.arg1),
4081                   atbSubst_Expr(env, e->Iex.Qop.arg2),
4082                   atbSubst_Expr(env, e->Iex.Qop.arg3),
4083                   atbSubst_Expr(env, e->Iex.Qop.arg4)
4084                );
4085      case Iex_Triop:
4086         return IRExpr_Triop(
4087                   e->Iex.Triop.op,
4088                   atbSubst_Expr(env, e->Iex.Triop.arg1),
4089                   atbSubst_Expr(env, e->Iex.Triop.arg2),
4090                   atbSubst_Expr(env, e->Iex.Triop.arg3)
4091                );
4092      case Iex_Binop:
4093         return fold_IRExpr_Binop(
4094                   e->Iex.Binop.op,
4095                   atbSubst_Expr(env, e->Iex.Binop.arg1),
4096                   atbSubst_Expr(env, e->Iex.Binop.arg2)
4097                );
4098      case Iex_Unop:
4099         return fold_IRExpr_Unop(
4100                   e->Iex.Unop.op,
4101                   atbSubst_Expr(env, e->Iex.Unop.arg)
4102                );
4103      case Iex_Load:
4104         return IRExpr_Load(
4105                   e->Iex.Load.end,
4106                   e->Iex.Load.ty,
4107                   atbSubst_Expr(env, e->Iex.Load.addr)
4108                );
4109      case Iex_GetI:
4110         return IRExpr_GetI(
4111                   e->Iex.GetI.descr,
4112                   atbSubst_Expr(env, e->Iex.GetI.ix),
4113                   e->Iex.GetI.bias
4114                );
4115      case Iex_Const:
4116      case Iex_Get:
4117         return e;
4118      default:
4119         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4120         vpanic("atbSubst_Expr");
4121   }
4122}
4123
4124/* Same deal as atbSubst_Expr, except for stmts. */
4125
4126static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4127{
4128   Int     i;
4129   IRDirty *d, *d2;
4130   IRCAS   *cas, *cas2;
4131   switch (st->tag) {
4132      case Ist_AbiHint:
4133         return IRStmt_AbiHint(
4134                   atbSubst_Expr(env, st->Ist.AbiHint.base),
4135                   st->Ist.AbiHint.len,
4136                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
4137                );
4138      case Ist_Store:
4139         return IRStmt_Store(
4140                   st->Ist.Store.end,
4141                   atbSubst_Expr(env, st->Ist.Store.addr),
4142                   atbSubst_Expr(env, st->Ist.Store.data)
4143                );
4144      case Ist_WrTmp:
4145         return IRStmt_WrTmp(
4146                   st->Ist.WrTmp.tmp,
4147                   atbSubst_Expr(env, st->Ist.WrTmp.data)
4148                );
4149      case Ist_Put:
4150         return IRStmt_Put(
4151                   st->Ist.Put.offset,
4152                   atbSubst_Expr(env, st->Ist.Put.data)
4153                );
4154      case Ist_PutI:
4155         return IRStmt_PutI(
4156                   st->Ist.PutI.descr,
4157                   atbSubst_Expr(env, st->Ist.PutI.ix),
4158                   st->Ist.PutI.bias,
4159                   atbSubst_Expr(env, st->Ist.PutI.data)
4160                );
4161
4162      case Ist_Exit:
4163         return IRStmt_Exit(
4164                   atbSubst_Expr(env, st->Ist.Exit.guard),
4165                   st->Ist.Exit.jk,
4166                   st->Ist.Exit.dst
4167                );
4168      case Ist_IMark:
4169         return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
4170      case Ist_NoOp:
4171         return IRStmt_NoOp();
4172      case Ist_MBE:
4173         return IRStmt_MBE(st->Ist.MBE.event);
4174      case Ist_CAS:
4175         cas  = st->Ist.CAS.details;
4176         cas2 = mkIRCAS(
4177                   cas->oldHi, cas->oldLo, cas->end,
4178                   atbSubst_Expr(env, cas->addr),
4179                   cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4180                   atbSubst_Expr(env, cas->expdLo),
4181                   cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4182                   atbSubst_Expr(env, cas->dataLo)
4183                );
4184         return IRStmt_CAS(cas2);
4185      case Ist_LLSC:
4186         return IRStmt_LLSC(
4187                   st->Ist.LLSC.end,
4188                   st->Ist.LLSC.result,
4189                   atbSubst_Expr(env, st->Ist.LLSC.addr),
4190                   st->Ist.LLSC.storedata
4191                      ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4192                );
4193      case Ist_Dirty:
4194         d  = st->Ist.Dirty.details;
4195         d2 = emptyIRDirty();
4196         *d2 = *d;
4197         if (d2->mFx != Ifx_None)
4198            d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4199         d2->guard = atbSubst_Expr(env, d2->guard);
4200         for (i = 0; d2->args[i]; i++)
4201            d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4202         return IRStmt_Dirty(d2);
4203      default:
4204         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4205         vpanic("atbSubst_Stmt");
4206   }
4207}
4208
4209/* notstatic */ void ado_treebuild_BB ( IRSB* bb )
4210{
4211   Int      i, j, k, m;
4212   Bool     stmtPuts, stmtStores, invalidateMe;
4213   IRStmt*  st;
4214   IRStmt*  st2;
4215   ATmpInfo env[A_NENV];
4216
4217   Int       n_tmps = bb->tyenv->types_used;
4218   UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
4219
4220   /* Phase 1.  Scan forwards in bb, counting use occurrences of each
4221      temp.  Also count occurrences in the bb->next field. */
4222
4223   for (i = 0; i < n_tmps; i++)
4224      uses[i] = 0;
4225
4226   for (i = 0; i < bb->stmts_used; i++) {
4227      st = bb->stmts[i];
4228      if (st->tag == Ist_NoOp)
4229         continue;
4230      aoccCount_Stmt( uses, st );
4231   }
4232   aoccCount_Expr(uses, bb->next );
4233
4234#  if 0
4235   for (i = 0; i < n_tmps; i++) {
4236      if (uses[i] == 0)
4237        continue;
4238      ppIRTemp( (IRTemp)i );
4239      vex_printf("  used %d\n", (Int)uses[i] );
4240   }
4241#  endif
4242
4243   /* Phase 2.  Scan forwards in bb.  For each statement in turn:
4244
4245         If the env is full, emit the end element.  This guarantees
4246         there is at least one free slot in the following.
4247
4248         On seeing 't = E', occ(t)==1,
4249            let E'=env(E)
4250            delete this stmt
4251            add t -> E' to the front of the env
4252            Examine E' and set the hints for E' appropriately
4253              (doesLoad? doesGet?)
4254
4255         On seeing any other stmt,
4256            let stmt' = env(stmt)
4257            remove from env any 't=E' binds invalidated by stmt
4258                emit the invalidated stmts
4259            emit stmt'
4260            compact any holes in env
4261              by sliding entries towards the front
4262
4263      Finally, apply env to bb->next.
4264   */
4265
4266   for (i = 0; i < A_NENV; i++) {
4267      env[i].bindee = NULL;
4268      env[i].binder = IRTemp_INVALID;
4269   }
4270
4271   /* The stmts in bb are being reordered, and we are guaranteed to
4272      end up with no more than the number we started with.  Use i to
4273      be the cursor of the current stmt examined and j <= i to be that
4274      for the current stmt being written.
4275   */
4276   j = 0;
4277   for (i = 0; i < bb->stmts_used; i++) {
4278
4279      st = bb->stmts[i];
4280      if (st->tag == Ist_NoOp)
4281         continue;
4282
4283      /* Ensure there's at least one space in the env, by emitting
4284         the oldest binding if necessary. */
4285      if (env[A_NENV-1].bindee != NULL) {
4286         bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
4287                                      env[A_NENV-1].bindee );
4288         j++;
4289         vassert(j <= i);
4290         env[A_NENV-1].bindee = NULL;
4291      }
4292
4293      /* Consider current stmt. */
4294      if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
4295         IRExpr *e, *e2;
4296
4297         /* optional extra: dump dead bindings as we find them.
4298            Removes the need for a prior dead-code removal pass. */
4299         if (uses[st->Ist.WrTmp.tmp] == 0) {
4300	    if (0) vex_printf("DEAD binding\n");
4301            continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4302         }
4303         vassert(uses[st->Ist.WrTmp.tmp] == 1);
4304
4305         /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
4306            actions. */
4307         e  = st->Ist.WrTmp.data;
4308         e2 = atbSubst_Expr(env, e);
4309         addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
4310         setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
4311         /* don't advance j, as we are deleting this stmt and instead
4312            holding it temporarily in the env. */
4313         continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4314      }
4315
4316      /* we get here for any other kind of statement. */
4317      /* 'use up' any bindings required by the current statement. */
4318      st2 = atbSubst_Stmt(env, st);
4319
4320      /* Now, before this stmt, dump any bindings in env that it
4321         invalidates.  These need to be dumped in the order in which
4322         they originally entered env -- that means from oldest to
4323         youngest. */
4324
4325      /* stmtPuts/stmtStores characterise what the stmt under
4326         consideration does, or might do (sidely safe @ True). */
4327      stmtPuts
4328         = toBool( st->tag == Ist_Put
4329                   || st->tag == Ist_PutI
4330                   || st->tag == Ist_Dirty );
4331
4332      /* be True if this stmt writes memory or might do (==> we don't
4333         want to reorder other loads or stores relative to it).  Also,
4334         both LL and SC fall under this classification, since we
4335         really ought to be conservative and not reorder any other
4336         memory transactions relative to them. */
4337      stmtStores
4338         = toBool( st->tag == Ist_Store
4339                   || st->tag == Ist_Dirty
4340                   || st->tag == Ist_LLSC );
4341
4342      for (k = A_NENV-1; k >= 0; k--) {
4343         if (env[k].bindee == NULL)
4344            continue;
4345         /* Compare the actions of this stmt with the actions of
4346            binding 'k', to see if they invalidate the binding. */
4347         invalidateMe
4348            = toBool(
4349              /* a store invalidates loaded data */
4350              (env[k].doesLoad && stmtStores)
4351              /* a put invalidates get'd data */
4352              || (env[k].doesGet && stmtPuts)
4353              /* a put invalidates loaded data.  Note, we could do
4354                 much better here in the sense that we only need to
4355                 invalidate trees containing loads if the Put in
4356                 question is marked as requiring precise
4357                 exceptions. */
4358              || (env[k].doesLoad && stmtPuts)
4359              /* probably overly conservative: a memory bus event
4360                 invalidates absolutely everything, so that all
4361                 computation prior to it is forced to complete before
4362                 proceeding with the event (fence,lock,unlock). */
4363              || st->tag == Ist_MBE
4364              /* also be (probably overly) paranoid re AbiHints */
4365              || st->tag == Ist_AbiHint
4366              );
4367         if (invalidateMe) {
4368            bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
4369            j++;
4370            vassert(j <= i);
4371            env[k].bindee = NULL;
4372         }
4373      }
4374
4375      /* Slide in-use entries in env up to the front */
4376      m = 0;
4377      for (k = 0; k < A_NENV; k++) {
4378         if (env[k].bindee != NULL) {
4379            env[m] = env[k];
4380            m++;
4381	 }
4382      }
4383      for (m = m; m < A_NENV; m++) {
4384         env[m].bindee = NULL;
4385      }
4386
4387      /* finally, emit the substituted statement */
4388      bb->stmts[j] = st2;
4389      /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
4390      j++;
4391
4392      vassert(j <= i+1);
4393   } /* for each stmt in the original bb ... */
4394
4395   /* Finally ... substitute the ->next field as much as possible, and
4396      dump any left-over bindings.  Hmm.  Perhaps there should be no
4397      left over bindings?  Or any left-over bindings are
4398      by definition dead? */
4399   bb->next = atbSubst_Expr(env, bb->next);
4400   bb->stmts_used = j;
4401}
4402
4403
4404/*---------------------------------------------------------------*/
4405/*--- iropt main                                              ---*/
4406/*---------------------------------------------------------------*/
4407
4408static Bool iropt_verbose = False; /* True; */
4409
4410
4411/* Do a simple cleanup pass on bb.  This is: redundant Get removal,
4412   redundant Put removal, constant propagation, dead code removal,
4413   clean helper specialisation, and dead code removal (again).
4414*/
4415
4416
4417static
4418IRSB* cheap_transformations (
4419         IRSB* bb,
4420         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4421         Bool (*preciseMemExnsFn)(Int,Int)
4422      )
4423{
4424   redundant_get_removal_BB ( bb );
4425   if (iropt_verbose) {
4426      vex_printf("\n========= REDUNDANT GET\n\n" );
4427      ppIRSB(bb);
4428   }
4429
4430   redundant_put_removal_BB ( bb, preciseMemExnsFn );
4431   if (iropt_verbose) {
4432      vex_printf("\n========= REDUNDANT PUT\n\n" );
4433      ppIRSB(bb);
4434   }
4435
4436   bb = cprop_BB ( bb );
4437   if (iropt_verbose) {
4438      vex_printf("\n========= CPROPD\n\n" );
4439      ppIRSB(bb);
4440   }
4441
4442   do_deadcode_BB ( bb );
4443   if (iropt_verbose) {
4444      vex_printf("\n========= DEAD\n\n" );
4445      ppIRSB(bb);
4446   }
4447
4448   bb = spec_helpers_BB ( bb, specHelper );
4449   do_deadcode_BB ( bb );
4450   if (iropt_verbose) {
4451      vex_printf("\n========= SPECd \n\n" );
4452      ppIRSB(bb);
4453   }
4454
4455   return bb;
4456}
4457
4458
4459/* Do some more expensive transformations on bb, which are aimed at
4460   optimising as much as possible in the presence of GetI and PutI.  */
4461
4462static
4463IRSB* expensive_transformations( IRSB* bb )
4464{
4465   (void)do_cse_BB( bb );
4466   collapse_AddSub_chains_BB( bb );
4467   do_redundant_GetI_elimination( bb );
4468   do_redundant_PutI_elimination( bb );
4469   do_deadcode_BB( bb );
4470   return bb;
4471}
4472
4473
4474/* Scan a flattened BB to look for signs that more expensive
4475   optimisations might be useful:
4476   - find out if there are any GetIs and PutIs
4477   - find out if there are any floating or vector-typed temporaries
4478*/
4479
4480static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
4481                                 /*OUT*/Bool* hasVorFtemps,
4482                                 IRSB* bb )
4483{
4484   Int      i, j;
4485   IRStmt*  st;
4486   IRDirty* d;
4487   IRCAS*   cas;
4488
4489   *hasGetIorPutI = False;
4490   *hasVorFtemps  = False;
4491
4492   for (i = 0; i < bb->stmts_used; i++) {
4493      st = bb->stmts[i];
4494      switch (st->tag) {
4495         case Ist_AbiHint:
4496            vassert(isIRAtom(st->Ist.AbiHint.base));
4497            vassert(isIRAtom(st->Ist.AbiHint.nia));
4498            break;
4499         case Ist_PutI:
4500            *hasGetIorPutI = True;
4501            break;
4502         case Ist_WrTmp:
4503            if (st->Ist.WrTmp.data->tag == Iex_GetI)
4504               *hasGetIorPutI = True;
4505            switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
4506               case Ity_I1: case Ity_I8: case Ity_I16:
4507               case Ity_I32: case Ity_I64: case Ity_I128:
4508                  break;
4509               case Ity_F32: case Ity_F64: case Ity_V128:
4510                  *hasVorFtemps = True;
4511                  break;
4512               default:
4513                  goto bad;
4514            }
4515            break;
4516         case Ist_Put:
4517            vassert(isIRAtom(st->Ist.Put.data));
4518            break;
4519         case Ist_Store:
4520            vassert(isIRAtom(st->Ist.Store.addr));
4521            vassert(isIRAtom(st->Ist.Store.data));
4522            break;
4523         case Ist_CAS:
4524            cas = st->Ist.CAS.details;
4525            vassert(isIRAtom(cas->addr));
4526            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
4527            vassert(isIRAtom(cas->expdLo));
4528            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
4529            vassert(isIRAtom(cas->dataLo));
4530            break;
4531         case Ist_LLSC:
4532            vassert(isIRAtom(st->Ist.LLSC.addr));
4533            if (st->Ist.LLSC.storedata)
4534               vassert(isIRAtom(st->Ist.LLSC.storedata));
4535            break;
4536         case Ist_Dirty:
4537            d = st->Ist.Dirty.details;
4538            vassert(isIRAtom(d->guard));
4539            for (j = 0; d->args[j]; j++)
4540               vassert(isIRAtom(d->args[j]));
4541            if (d->mFx != Ifx_None)
4542               vassert(isIRAtom(d->mAddr));
4543            break;
4544         case Ist_NoOp:
4545         case Ist_IMark:
4546         case Ist_MBE:
4547            break;
4548         case Ist_Exit:
4549            vassert(isIRAtom(st->Ist.Exit.guard));
4550            break;
4551         default:
4552         bad:
4553            ppIRStmt(st);
4554            vpanic("considerExpensives");
4555      }
4556   }
4557}
4558
4559
4560/* ---------------- The main iropt entry point. ---------------- */
4561
4562/* exported from this file */
4563/* Rules of the game:
4564
4565   - IRExpr/IRStmt trees should be treated as immutable, as they
4566     may get shared.  So never change a field of such a tree node;
4567     instead construct and return a new one if needed.
4568*/
4569
4570
4571IRSB* do_iropt_BB(
4572         IRSB* bb0,
4573         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4574         Bool (*preciseMemExnsFn)(Int,Int),
4575         Addr64 guest_addr,
4576         VexArch guest_arch
4577      )
4578{
4579   static Int n_total     = 0;
4580   static Int n_expensive = 0;
4581
4582   Bool hasGetIorPutI, hasVorFtemps;
4583   IRSB *bb, *bb2;
4584
4585   n_total++;
4586
4587   /* First flatten the block out, since all other
4588      phases assume flat code. */
4589
4590   bb = flatten_BB ( bb0 );
4591
4592   if (iropt_verbose) {
4593      vex_printf("\n========= FLAT\n\n" );
4594      ppIRSB(bb);
4595   }
4596
4597   /* If at level 0, stop now. */
4598   if (vex_control.iropt_level <= 0) return bb;
4599
4600   /* Now do a preliminary cleanup pass, and figure out if we also
4601      need to do 'expensive' optimisations.  Expensive optimisations
4602      are deemed necessary if the block contains any GetIs or PutIs.
4603      If needed, do expensive transformations and then another cheap
4604      cleanup pass. */
4605
4606   bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4607
4608   if (guest_arch == VexArchARM) {
4609      /* Translating Thumb2 code produces a lot of chaff.  We have to
4610         work extra hard to get rid of it. */
4611      bb = cprop_BB(bb);
4612      bb = spec_helpers_BB ( bb, specHelper );
4613      redundant_put_removal_BB ( bb, preciseMemExnsFn );
4614      do_deadcode_BB( bb );
4615   }
4616
4617   if (vex_control.iropt_level > 1) {
4618
4619      /* Peer at what we have, to decide how much more effort to throw
4620         at it. */
4621      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
4622
4623      if (hasVorFtemps && !hasGetIorPutI) {
4624         /* If any evidence of FP or Vector activity, CSE, as that
4625            tends to mop up all manner of lardy code to do with
4626            rounding modes.  Don't bother if hasGetIorPutI since that
4627            case leads into the expensive transformations, which do
4628            CSE anyway. */
4629         (void)do_cse_BB( bb );
4630         do_deadcode_BB( bb );
4631      }
4632
4633      if (hasGetIorPutI) {
4634         Bool cses;
4635         n_expensive++;
4636         if (DEBUG_IROPT)
4637            vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
4638         bb = expensive_transformations( bb );
4639         bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4640         /* Potentially common up GetIs */
4641         cses = do_cse_BB( bb );
4642         if (cses)
4643            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4644      }
4645
4646      /* Now have a go at unrolling simple (single-BB) loops.  If
4647         successful, clean up the results as much as possible. */
4648
4649      bb2 = maybe_loop_unroll_BB( bb, guest_addr );
4650      if (bb2) {
4651         bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
4652         if (hasGetIorPutI) {
4653            bb = expensive_transformations( bb );
4654            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4655         } else {
4656            /* at least do CSE and dead code removal */
4657            do_cse_BB( bb );
4658            do_deadcode_BB( bb );
4659         }
4660         if (0) vex_printf("vex iropt: unrolled a loop\n");
4661      }
4662
4663   }
4664
4665   return bb;
4666}
4667
4668
4669/*---------------------------------------------------------------*/
4670/*--- end                                            ir_opt.c ---*/
4671/*---------------------------------------------------------------*/
4672