1
2/*---------------------------------------------------------------*/
3/*--- begin                                          ir_opt.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2011 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
40#include "main_util.h"
41#include "main_globals.h"
42#include "ir_opt.h"
43
44
45/* Set to 1 for lots of debugging output. */
46#define DEBUG_IROPT 0
47
48
49/* What iropt does, 29 Dec 04.
50
51   It takes an IRSB and produces a new one with the same meaning,
52   defined thus:
53
54   After execution of the new BB, all guest state and guest memory is
55   the same as after execution of the original.  This is true
56   regardless of how the block was exited (at the end vs side exit).
57
58   In addition, parts of the guest state will be identical to that
59   created by execution of the original at the following observation
60   points:
61
62   * In a dirty helper call, any parts of the guest state that the
63     helper states that it reads or modifies will be up to date.
64     Also, guest memory will be up to date.  Parts of the guest state
65     not marked as being read or modified by the helper cannot be
66     assumed to be up-to-date at the point where the helper is called.
67
68   * Immediately prior to any load or store, those parts of the guest
69     state marked as requiring precise exceptions will be up to date.
70     Also, guest memory will be up to date.  Parts of the guest state
71     not marked as requiring precise exceptions cannot be assumed to
72     be up-to-date at the point of the load/store.
73
74   The relative order of loads and stores (including loads/stores of
75   guest memory done by dirty helpers annotated as such) is not
76   changed.  However, the relative order of loads with no intervening
77   stores/modifies may be changed.
78
79   Transformation order
80   ~~~~~~~~~~~~~~~~~~~~
81
82   There are three levels of optimisation, controlled by
83   vex_control.iropt_level.  Define first:
84
85   "Cheap transformations" are the following sequence:
86      * Redundant-Get removal
87      * Redundant-Put removal
88      * Constant propagation/folding
89      * Dead code removal
90      * Specialisation of clean helper functions
91      * Dead code removal
92
93   "Expensive transformations" are the following sequence:
94      * CSE
95      * Folding of add/sub chains
96      * Redundant-GetI removal
97      * Redundant-PutI removal
98      * Dead code removal
99
100   Then the transformations are as follows, as defined by
101   vex_control.iropt_level:
102
103   Level 0:
104      * Flatten into atomic form.
105
106   Level 1: the following sequence:
107      * Flatten into atomic form.
108      * Cheap transformations.
109
110   Level 2: the following sequence
111      * Flatten into atomic form.
112      * Cheap transformations.
113      * If block contains any floating or vector types, CSE.
114      * If block contains GetI or PutI, Expensive transformations.
115      * Try unrolling loops.  Three possible outcomes:
116        - No effect: do nothing more.
117        - Unrolled a loop, and block does not contain GetI or PutI:
118          Do: * CSE
119              * Dead code removal
120        - Unrolled a loop, and block contains GetI or PutI:
121          Do: * Expensive transformations
122              * Cheap transformations
123*/
124
125/* Implementation notes, 29 Dec 04.
126
127   TODO (important): I think rPutI removal ignores precise exceptions
128   and is therefore in a sense, wrong.  In the sense that PutIs are
129   assumed not to write parts of the guest state that we need to have
130   up-to-date at loads/stores.  So far on x86 guest that has not
131   mattered since indeed only the x87 FP registers and tags are
132   accessed using GetI/PutI, and there is no need so far for them to
133   be up to date at mem exception points.  The rPutI pass should be
134   fixed.
135
136   TODO: improve pessimistic handling of precise exceptions
137     in the tree builder.
138
139   TODO: check interaction of rGetI and dirty helpers.
140
141   F64i constants are treated differently from other constants.
142   They are not regarded as atoms, and instead lifted off and
143   bound to temps.  This allows them to participate in CSE, which
144   is important for getting good performance for x86 guest code.
145
146   CSE up F64 literals (already doing F64is)
147
148   CSE: consider carefully the requirement for precise exns
149        prior to making CSE any more aggressive.  */
150
151
152/*---------------------------------------------------------------*/
153/*--- Finite mappery, of a sort                               ---*/
154/*---------------------------------------------------------------*/
155
156/* General map from HWord-sized thing HWord-sized thing.  Could be by
157   hashing, but it's not clear whether or not this would really be any
158   faster. */
159
160typedef
161   struct {
162      Bool*  inuse;
163      HWord* key;
164      HWord* val;
165      Int    size;
166      Int    used;
167   }
168   HashHW;
169
170static HashHW* newHHW ( void )
171{
172   HashHW* h = LibVEX_Alloc(sizeof(HashHW));
173   h->size   = 8;
174   h->used   = 0;
175   h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
176   h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
177   h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
178   return h;
179}
180
181
182/* Look up key in the map. */
183
184static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
185{
186   Int i;
187   /* vex_printf("lookupHHW(%llx)\n", key ); */
188   for (i = 0; i < h->used; i++) {
189      if (h->inuse[i] && h->key[i] == key) {
190         if (val)
191            *val = h->val[i];
192         return True;
193      }
194   }
195   return False;
196}
197
198
199/* Add key->val to the map.  Replaces any existing binding for key. */
200
201static void addToHHW ( HashHW* h, HWord key, HWord val )
202{
203   Int i, j;
204   /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
205
206   /* Find and replace existing binding, if any. */
207   for (i = 0; i < h->used; i++) {
208      if (h->inuse[i] && h->key[i] == key) {
209         h->val[i] = val;
210         return;
211      }
212   }
213
214   /* Ensure a space is available. */
215   if (h->used == h->size) {
216      /* Copy into arrays twice the size. */
217      Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
218      HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
219      HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
220      for (i = j = 0; i < h->size; i++) {
221         if (!h->inuse[i]) continue;
222         inuse2[j] = True;
223         key2[j] = h->key[i];
224         val2[j] = h->val[i];
225         j++;
226      }
227      h->used = j;
228      h->size *= 2;
229      h->inuse = inuse2;
230      h->key = key2;
231      h->val = val2;
232   }
233
234   /* Finally, add it. */
235   vassert(h->used < h->size);
236   h->inuse[h->used] = True;
237   h->key[h->used] = key;
238   h->val[h->used] = val;
239   h->used++;
240}
241
242
243/*---------------------------------------------------------------*/
244/*--- Flattening out a BB into atomic SSA form                ---*/
245/*---------------------------------------------------------------*/
246
247/* Non-critical helper, heuristic for reducing the number of tmp-tmp
248   copies made by flattening.  If in doubt return False. */
249
250static Bool isFlat ( IRExpr* e )
251{
252   if (e->tag == Iex_Get)
253      return True;
254   if (e->tag == Iex_Binop)
255      return toBool( isIRAtom(e->Iex.Binop.arg1)
256                     && isIRAtom(e->Iex.Binop.arg2) );
257   if (e->tag == Iex_Load)
258      return isIRAtom(e->Iex.Load.addr);
259   return False;
260}
261
262/* Flatten out 'ex' so it is atomic, returning a new expression with
263   the same value, after having appended extra IRTemp assignments to
264   the end of 'bb'. */
265
266static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
267{
268   Int i;
269   IRExpr** newargs;
270   IRType ty = typeOfIRExpr(bb->tyenv, ex);
271   IRTemp t1;
272
273   switch (ex->tag) {
274
275      case Iex_GetI:
276         t1 = newIRTemp(bb->tyenv, ty);
277         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
278            IRExpr_GetI(ex->Iex.GetI.descr,
279                        flatten_Expr(bb, ex->Iex.GetI.ix),
280                        ex->Iex.GetI.bias)));
281         return IRExpr_RdTmp(t1);
282
283      case Iex_Get:
284         t1 = newIRTemp(bb->tyenv, ty);
285         addStmtToIRSB(bb,
286            IRStmt_WrTmp(t1, ex));
287         return IRExpr_RdTmp(t1);
288
289      case Iex_Qop:
290         t1 = newIRTemp(bb->tyenv, ty);
291         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
292            IRExpr_Qop(ex->Iex.Qop.op,
293                         flatten_Expr(bb, ex->Iex.Qop.arg1),
294                         flatten_Expr(bb, ex->Iex.Qop.arg2),
295                         flatten_Expr(bb, ex->Iex.Qop.arg3),
296                         flatten_Expr(bb, ex->Iex.Qop.arg4))));
297         return IRExpr_RdTmp(t1);
298
299      case Iex_Triop:
300         t1 = newIRTemp(bb->tyenv, ty);
301         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
302            IRExpr_Triop(ex->Iex.Triop.op,
303                         flatten_Expr(bb, ex->Iex.Triop.arg1),
304                         flatten_Expr(bb, ex->Iex.Triop.arg2),
305                         flatten_Expr(bb, ex->Iex.Triop.arg3))));
306         return IRExpr_RdTmp(t1);
307
308      case Iex_Binop:
309         t1 = newIRTemp(bb->tyenv, ty);
310         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
311            IRExpr_Binop(ex->Iex.Binop.op,
312                         flatten_Expr(bb, ex->Iex.Binop.arg1),
313                         flatten_Expr(bb, ex->Iex.Binop.arg2))));
314         return IRExpr_RdTmp(t1);
315
316      case Iex_Unop:
317         t1 = newIRTemp(bb->tyenv, ty);
318         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
319            IRExpr_Unop(ex->Iex.Unop.op,
320                        flatten_Expr(bb, ex->Iex.Unop.arg))));
321         return IRExpr_RdTmp(t1);
322
323      case Iex_Load:
324         t1 = newIRTemp(bb->tyenv, ty);
325         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
326            IRExpr_Load(ex->Iex.Load.end,
327                        ex->Iex.Load.ty,
328                        flatten_Expr(bb, ex->Iex.Load.addr))));
329         return IRExpr_RdTmp(t1);
330
331      case Iex_CCall:
332         newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
333         for (i = 0; newargs[i]; i++)
334            newargs[i] = flatten_Expr(bb, newargs[i]);
335         t1 = newIRTemp(bb->tyenv, ty);
336         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
337            IRExpr_CCall(ex->Iex.CCall.cee,
338                         ex->Iex.CCall.retty,
339                         newargs)));
340         return IRExpr_RdTmp(t1);
341
342      case Iex_Mux0X:
343         t1 = newIRTemp(bb->tyenv, ty);
344         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
345            IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
346                         flatten_Expr(bb, ex->Iex.Mux0X.expr0),
347                         flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
348         return IRExpr_RdTmp(t1);
349
350      case Iex_Const:
351         /* Lift F64i constants out onto temps so they can be CSEd
352            later. */
353         if (ex->Iex.Const.con->tag == Ico_F64i) {
354            t1 = newIRTemp(bb->tyenv, ty);
355            addStmtToIRSB(bb, IRStmt_WrTmp(t1,
356               IRExpr_Const(ex->Iex.Const.con)));
357            return IRExpr_RdTmp(t1);
358         } else {
359            /* Leave all other constants alone. */
360            return ex;
361         }
362
363      case Iex_RdTmp:
364         return ex;
365
366      default:
367         vex_printf("\n");
368         ppIRExpr(ex);
369         vex_printf("\n");
370         vpanic("flatten_Expr");
371   }
372}
373
374
375/* Append a completely flattened form of 'st' to the end of 'bb'. */
376
377static void flatten_Stmt ( IRSB* bb, IRStmt* st )
378{
379   Int i;
380   IRExpr  *e1, *e2, *e3, *e4, *e5;
381   IRDirty *d,  *d2;
382   IRCAS   *cas, *cas2;
383   switch (st->tag) {
384      case Ist_Put:
385         if (isIRAtom(st->Ist.Put.data)) {
386            /* optimisation to reduce the amount of heap wasted
387               by the flattener */
388            addStmtToIRSB(bb, st);
389         } else {
390            /* general case, always correct */
391            e1 = flatten_Expr(bb, st->Ist.Put.data);
392            addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
393         }
394         break;
395      case Ist_PutI:
396         e1 = flatten_Expr(bb, st->Ist.PutI.ix);
397         e2 = flatten_Expr(bb, st->Ist.PutI.data);
398         addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
399                                       e1,
400                                       st->Ist.PutI.bias,
401                                       e2));
402         break;
403      case Ist_WrTmp:
404         if (isFlat(st->Ist.WrTmp.data)) {
405            /* optimisation, to reduce the number of tmp-tmp
406               copies generated */
407            addStmtToIRSB(bb, st);
408         } else {
409            /* general case, always correct */
410            e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
411            addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
412         }
413         break;
414      case Ist_Store:
415         e1 = flatten_Expr(bb, st->Ist.Store.addr);
416         e2 = flatten_Expr(bb, st->Ist.Store.data);
417         addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
418         break;
419      case Ist_CAS:
420         cas  = st->Ist.CAS.details;
421         e1   = flatten_Expr(bb, cas->addr);
422         e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
423         e3   = flatten_Expr(bb, cas->expdLo);
424         e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
425         e5   = flatten_Expr(bb, cas->dataLo);
426         cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
427                         e1, e2, e3, e4, e5 );
428         addStmtToIRSB(bb, IRStmt_CAS(cas2));
429         break;
430      case Ist_LLSC:
431         e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
432         e2 = st->Ist.LLSC.storedata
433                 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
434                 : NULL;
435         addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
436                                       st->Ist.LLSC.result, e1, e2));
437         break;
438      case Ist_Dirty:
439         d = st->Ist.Dirty.details;
440         d2 = emptyIRDirty();
441         *d2 = *d;
442         d2->args = shallowCopyIRExprVec(d2->args);
443         if (d2->mFx != Ifx_None) {
444            d2->mAddr = flatten_Expr(bb, d2->mAddr);
445         } else {
446            vassert(d2->mAddr == NULL);
447         }
448         d2->guard = flatten_Expr(bb, d2->guard);
449         for (i = 0; d2->args[i]; i++)
450            d2->args[i] = flatten_Expr(bb, d2->args[i]);
451         addStmtToIRSB(bb, IRStmt_Dirty(d2));
452         break;
453      case Ist_NoOp:
454      case Ist_MBE:
455      case Ist_IMark:
456         addStmtToIRSB(bb, st);
457         break;
458      case Ist_AbiHint:
459         e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
460         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
461         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
462         break;
463      case Ist_Exit:
464         e1 = flatten_Expr(bb, st->Ist.Exit.guard);
465         addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
466                                           st->Ist.Exit.dst));
467         break;
468      default:
469         vex_printf("\n");
470         ppIRStmt(st);
471         vex_printf("\n");
472         vpanic("flatten_Stmt");
473   }
474}
475
476
477static IRSB* flatten_BB ( IRSB* in )
478{
479   Int   i;
480   IRSB* out;
481   out = emptyIRSB();
482   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
483   for (i = 0; i < in->stmts_used; i++)
484      if (in->stmts[i])
485         flatten_Stmt( out, in->stmts[i] );
486   out->next     = flatten_Expr( out, in->next );
487   out->jumpkind = in->jumpkind;
488   return out;
489}
490
491
492/*---------------------------------------------------------------*/
493/*--- In-place removal of redundant GETs                      ---*/
494/*---------------------------------------------------------------*/
495
496/* Scan forwards, building up an environment binding (min offset, max
497   offset) pairs to values, which will either be temps or constants.
498
499   On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
500   env and if it matches, replace the Get with the stored value.  If
501   there is no match, add a (minoff,maxoff) :-> t binding.
502
503   On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
504   any binding which fully or partially overlaps with (minoff,maxoff).
505   Then add a new (minoff,maxoff) :-> t or c binding.  */
506
507/* Extract the min/max offsets from a guest state array descriptor. */
508
509inline
510static void getArrayBounds ( IRRegArray* descr,
511                             UInt* minoff, UInt* maxoff )
512{
513   *minoff = descr->base;
514   *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
515   vassert((*minoff & ~0xFFFF) == 0);
516   vassert((*maxoff & ~0xFFFF) == 0);
517   vassert(*minoff <= *maxoff);
518}
519
520/* Create keys, of the form ((minoffset << 16) | maxoffset). */
521
522static UInt mk_key_GetPut ( Int offset, IRType ty )
523{
524   /* offset should fit in 16 bits. */
525   UInt minoff = offset;
526   UInt maxoff = minoff + sizeofIRType(ty) - 1;
527   vassert((minoff & ~0xFFFF) == 0);
528   vassert((maxoff & ~0xFFFF) == 0);
529   return (minoff << 16) | maxoff;
530}
531
532static UInt mk_key_GetIPutI ( IRRegArray* descr )
533{
534   UInt minoff, maxoff;
535   getArrayBounds( descr, &minoff, &maxoff );
536   vassert((minoff & ~0xFFFF) == 0);
537   vassert((maxoff & ~0xFFFF) == 0);
538   return (minoff << 16) | maxoff;
539}
540
541/* Supposing h has keys of the form generated by mk_key_GetPut and
542   mk_key_GetIPutI, invalidate any key which overlaps (k_lo
543   .. k_hi).
544*/
545static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
546{
547   Int  j;
548   UInt e_lo, e_hi;
549   vassert(k_lo <= k_hi);
550   /* invalidate any env entries which in any way overlap (k_lo
551      .. k_hi) */
552   /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
553
554   for (j = 0; j < h->used; j++) {
555      if (!h->inuse[j])
556         continue;
557      e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
558      e_hi = ((UInt)h->key[j]) & 0xFFFF;
559      vassert(e_lo <= e_hi);
560      if (e_hi < k_lo || k_hi < e_lo)
561         continue; /* no overlap possible */
562      else
563         /* overlap; invalidate */
564         h->inuse[j] = False;
565   }
566}
567
568
569static void redundant_get_removal_BB ( IRSB* bb )
570{
571   HashHW* env = newHHW();
572   UInt    key = 0; /* keep gcc -O happy */
573   Int     i, j;
574   HWord   val;
575
576   for (i = 0; i < bb->stmts_used; i++) {
577      IRStmt* st = bb->stmts[i];
578
579      if (st->tag == Ist_NoOp)
580         continue;
581
582      /* Deal with Gets */
583      if (st->tag == Ist_WrTmp
584          && st->Ist.WrTmp.data->tag == Iex_Get) {
585         /* st is 't = Get(...)'.  Look up in the environment and see
586            if the Get can be replaced. */
587         IRExpr* get = st->Ist.WrTmp.data;
588         key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
589                                     get->Iex.Get.ty );
590         if (lookupHHW(env, &val, (HWord)key)) {
591            /* found it */
592            /* Note, we could do better here.  If the types are
593               different we don't do the substitution, since doing so
594               could lead to invalidly-typed IR.  An improvement would
595               be to stick in a reinterpret-style cast, although that
596               would make maintaining flatness more difficult. */
597            IRExpr* valE    = (IRExpr*)val;
598            Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
599                                      == st->Ist.WrTmp.data->Iex.Get.ty );
600            if (typesOK && DEBUG_IROPT) {
601               vex_printf("rGET: "); ppIRExpr(get);
602               vex_printf("  ->  "); ppIRExpr(valE);
603               vex_printf("\n");
604            }
605            if (typesOK)
606               bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
607         } else {
608            /* Not found, but at least we know that t and the Get(...)
609               are now associated.  So add a binding to reflect that
610               fact. */
611            addToHHW( env, (HWord)key,
612                           (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
613         }
614      }
615
616      /* Deal with Puts: invalidate any env entries overlapped by this
617         Put */
618      if (st->tag == Ist_Put || st->tag == Ist_PutI) {
619         UInt k_lo, k_hi;
620         if (st->tag == Ist_Put) {
621            key = mk_key_GetPut( st->Ist.Put.offset,
622                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
623         } else {
624            vassert(st->tag == Ist_PutI);
625            key = mk_key_GetIPutI( st->Ist.PutI.descr );
626         }
627
628         k_lo = (key >> 16) & 0xFFFF;
629         k_hi = key & 0xFFFF;
630         invalidateOverlaps(env, k_lo, k_hi);
631      }
632      else
633      if (st->tag == Ist_Dirty) {
634         /* Deal with dirty helpers which write or modify guest state.
635            Invalidate the entire env.  We could do a lot better
636            here. */
637         IRDirty* d      = st->Ist.Dirty.details;
638         Bool     writes = False;
639         for (j = 0; j < d->nFxState; j++) {
640            if (d->fxState[j].fx == Ifx_Modify
641                || d->fxState[j].fx == Ifx_Write)
642            writes = True;
643         }
644         if (writes) {
645            /* dump the entire env (not clever, but correct ...) */
646            for (j = 0; j < env->used; j++)
647               env->inuse[j] = False;
648            if (0) vex_printf("rGET: trash env due to dirty helper\n");
649         }
650      }
651
652      /* add this one to the env, if appropriate */
653      if (st->tag == Ist_Put) {
654         vassert(isIRAtom(st->Ist.Put.data));
655         addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
656      }
657
658   } /* for (i = 0; i < bb->stmts_used; i++) */
659
660}
661
662
663/*---------------------------------------------------------------*/
664/*--- In-place removal of redundant PUTs                      ---*/
665/*---------------------------------------------------------------*/
666
667/* Find any Get uses in st and invalidate any partially or fully
668   overlapping ranges listed in env.  Due to the flattening phase, the
669   only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
670
671static void handle_gets_Stmt (
672               HashHW* env,
673               IRStmt* st,
674               Bool (*preciseMemExnsFn)(Int,Int)
675            )
676{
677   Int     j;
678   UInt    key = 0; /* keep gcc -O happy */
679   Bool    isGet;
680   Bool    memRW = False;
681   IRExpr* e;
682
683   switch (st->tag) {
684
685      /* This is the only interesting case.  Deal with Gets in the RHS
686         expression. */
687      case Ist_WrTmp:
688         e = st->Ist.WrTmp.data;
689         switch (e->tag) {
690            case Iex_Get:
691               isGet = True;
692               key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
693               break;
694            case Iex_GetI:
695               isGet = True;
696               key = mk_key_GetIPutI ( e->Iex.GetI.descr );
697               break;
698            case Iex_Load:
699               isGet = False;
700               memRW = True;
701               break;
702            default:
703               isGet = False;
704         }
705         if (isGet) {
706            UInt k_lo, k_hi;
707            k_lo = (key >> 16) & 0xFFFF;
708            k_hi = key & 0xFFFF;
709            invalidateOverlaps(env, k_lo, k_hi);
710         }
711         break;
712
713      /* Be very conservative for dirty helper calls; dump the entire
714         environment.  The helper might read guest state, in which
715         case it needs to be flushed first.  Also, the helper might
716         access guest memory, in which case all parts of the guest
717         state requiring precise exceptions needs to be flushed.  The
718         crude solution is just to flush everything; we could easily
719         enough do a lot better if needed. */
720      /* Probably also overly-conservative, but also dump everything
721         if we hit a memory bus event (fence, lock, unlock).  Ditto
722         AbiHints, CASs, LLs and SCs. */
723      case Ist_AbiHint:
724         vassert(isIRAtom(st->Ist.AbiHint.base));
725         vassert(isIRAtom(st->Ist.AbiHint.nia));
726         /* fall through */
727      case Ist_MBE:
728      case Ist_Dirty:
729      case Ist_CAS:
730      case Ist_LLSC:
731         for (j = 0; j < env->used; j++)
732            env->inuse[j] = False;
733         break;
734
735      /* all other cases are boring. */
736      case Ist_Store:
737         vassert(isIRAtom(st->Ist.Store.addr));
738         vassert(isIRAtom(st->Ist.Store.data));
739         memRW = True;
740         break;
741
742      case Ist_Exit:
743         vassert(isIRAtom(st->Ist.Exit.guard));
744         break;
745
746      case Ist_PutI:
747         vassert(isIRAtom(st->Ist.PutI.ix));
748         vassert(isIRAtom(st->Ist.PutI.data));
749         break;
750
751      case Ist_NoOp:
752      case Ist_IMark:
753         break;
754
755      default:
756         vex_printf("\n");
757         ppIRStmt(st);
758         vex_printf("\n");
759         vpanic("handle_gets_Stmt");
760   }
761
762   if (memRW) {
763      /* This statement accesses memory.  So we need to dump all parts
764         of the environment corresponding to guest state that may not
765         be reordered with respect to memory references.  That means
766         at least the stack pointer. */
767      for (j = 0; j < env->used; j++) {
768         if (!env->inuse[j])
769            continue;
770         if (vex_control.iropt_precise_memory_exns) {
771            /* Precise exceptions required.  Flush all guest state. */
772            env->inuse[j] = False;
773         } else {
774            /* Just flush the minimal amount required, as computed by
775               preciseMemExnsFn. */
776            HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
777            HWord k_hi = env->key[j] & 0xFFFF;
778            if (preciseMemExnsFn( k_lo, k_hi ))
779               env->inuse[j] = False;
780         }
781      }
782   } /* if (memRW) */
783
784}
785
786
787/* Scan backwards, building up a set of (min offset, max
788   offset) pairs, indicating those parts of the guest state
789   for which the next event is a write.
790
791   On seeing a conditional exit, empty the set.
792
793   On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
794   completely within the set, remove the Put.  Otherwise, add
795   (minoff,maxoff) to the set.
796
797   On seeing 'Get (minoff,maxoff)', remove any part of the set
798   overlapping (minoff,maxoff).  The same has to happen for any events
799   which implicitly read parts of the guest state: dirty helper calls
800   and loads/stores.
801*/
802
803static void redundant_put_removal_BB (
804               IRSB* bb,
805               Bool (*preciseMemExnsFn)(Int,Int)
806            )
807{
808   Int     i, j;
809   Bool    isPut;
810   IRStmt* st;
811   UInt    key = 0; /* keep gcc -O happy */
812
813   HashHW* env = newHHW();
814   for (i = bb->stmts_used-1; i >= 0; i--) {
815      st = bb->stmts[i];
816
817      if (st->tag == Ist_NoOp)
818         continue;
819
820      /* Deal with conditional exits. */
821      if (st->tag == Ist_Exit) {
822         /* Since control may not get beyond this point, we must empty
823            out the set, since we can no longer claim that the next
824            event for any part of the guest state is definitely a
825            write. */
826         vassert(isIRAtom(st->Ist.Exit.guard));
827         for (j = 0; j < env->used; j++)
828            env->inuse[j] = False;
829         continue;
830      }
831
832      /* Deal with Puts */
833      switch (st->tag) {
834         case Ist_Put:
835            isPut = True;
836            key = mk_key_GetPut( st->Ist.Put.offset,
837                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
838            vassert(isIRAtom(st->Ist.Put.data));
839            break;
840         case Ist_PutI:
841            isPut = True;
842            key = mk_key_GetIPutI( st->Ist.PutI.descr );
843            vassert(isIRAtom(st->Ist.PutI.ix));
844            vassert(isIRAtom(st->Ist.PutI.data));
845            break;
846         default:
847            isPut = False;
848      }
849      if (isPut && st->tag != Ist_PutI) {
850         /* See if any single entry in env overlaps this Put.  This is
851            simplistic in that the transformation is valid if, say, two
852            or more entries in the env overlap this Put, but the use of
853            lookupHHW will only find a single entry which exactly
854            overlaps this Put.  This is suboptimal but safe. */
855         if (lookupHHW(env, NULL, (HWord)key)) {
856            /* This Put is redundant because a later one will overwrite
857               it.  So NULL (nop) it out. */
858            if (DEBUG_IROPT) {
859               vex_printf("rPUT: "); ppIRStmt(st);
860               vex_printf("\n");
861            }
862            bb->stmts[i] = IRStmt_NoOp();
863         } else {
864            /* We can't demonstrate that this Put is redundant, so add it
865               to the running collection. */
866            addToHHW(env, (HWord)key, 0);
867         }
868         continue;
869      }
870
871      /* Deal with Gets.  These remove bits of the environment since
872         appearance of a Get means that the next event for that slice
873         of the guest state is no longer a write, but a read.  Also
874         deals with implicit reads of guest state needed to maintain
875         precise exceptions. */
876      handle_gets_Stmt( env, st, preciseMemExnsFn );
877   }
878}
879
880
881/*---------------------------------------------------------------*/
882/*--- Constant propagation and folding                        ---*/
883/*---------------------------------------------------------------*/
884
885/* The env in this section is a map from IRTemp to IRExpr*,
886   that is, an array indexed by IRTemp. */
887
888/* Are both expressions simply the same IRTemp ? */
889static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
890{
891   return toBool( e1->tag == Iex_RdTmp
892                  && e2->tag == Iex_RdTmp
893                  && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
894}
895
896static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
897{
898   return toBool( e1->tag == Iex_Const
899                  && e2->tag == Iex_Const
900                  && e1->Iex.Const.con->tag == Ico_U32
901                  && e2->Iex.Const.con->tag == Ico_U32
902                  && e1->Iex.Const.con->Ico.U32
903                     == e2->Iex.Const.con->Ico.U32 );
904}
905
906/* Are both expressions either the same IRTemp or IRConst-U32s ?  If
907   in doubt, say No. */
908static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
909{
910   switch (e1->tag) {
911      case Iex_RdTmp:
912         return sameIRTemps(e1, e2);
913      case Iex_Const:
914         return sameIcoU32s(e1, e2);
915      default:
916         return False;
917   }
918}
919
920/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
921static Bool isZeroU32 ( IRExpr* e )
922{
923   return toBool( e->tag == Iex_Const
924                  && e->Iex.Const.con->tag == Ico_U32
925                  && e->Iex.Const.con->Ico.U32 == 0);
926}
927
928/* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
929static Bool isZeroU64 ( IRExpr* e )
930{
931   return toBool( e->tag == Iex_Const
932                  && e->Iex.Const.con->tag == Ico_U64
933                  && e->Iex.Const.con->Ico.U64 == 0);
934}
935
936static Bool notBool ( Bool b )
937{
938   if (b == True) return False;
939   if (b == False) return True;
940   vpanic("notBool");
941}
942
943/* Make a zero which has the same type as the result of the given
944   primop. */
945static IRExpr* mkZeroOfPrimopResultType ( IROp op )
946{
947   switch (op) {
948      case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
949      case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
950      case Iop_Sub32:
951      case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
952      case Iop_Sub64:
953      case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
954      case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
955      default: vpanic("mkZeroOfPrimopResultType: bad primop");
956   }
957}
958
959/* Make a value containing all 1-bits, which has the same type as the
960   result of the given primop. */
961static IRExpr* mkOnesOfPrimopResultType ( IROp op )
962{
963   switch (op) {
964      case Iop_CmpEQ64:
965         return IRExpr_Const(IRConst_U1(toBool(1)));
966      case Iop_CmpEQ8x8:
967         return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
968      case Iop_CmpEQ8x16:
969         return IRExpr_Const(IRConst_V128(0xFFFF));
970      default:
971         vpanic("mkOnesOfPrimopResultType: bad primop");
972   }
973}
974
975/* Helpers for folding Clz32/64. */
976static UInt fold_Clz64 ( ULong value )
977{
978   UInt i;
979   vassert(value != 0ULL); /* no defined semantics for arg==0 */
980   for (i = 0; i < 64; ++i) {
981      if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
982   }
983   vassert(0);
984   /*NOTREACHED*/
985   return 0;
986}
987
988static UInt fold_Clz32 ( UInt value )
989{
990   UInt i;
991   vassert(value != 0); /* no defined semantics for arg==0 */
992   for (i = 0; i < 32; ++i) {
993      if (0 != (value & (((UInt)1) << (31 - i)))) return i;
994   }
995   vassert(0);
996   /*NOTREACHED*/
997   return 0;
998}
999
1000
1001static IRExpr* fold_Expr ( IRExpr* e )
1002{
1003   Int     shift;
1004   IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1005
1006   /* UNARY ops */
1007   if (e->tag == Iex_Unop
1008       && e->Iex.Unop.arg->tag == Iex_Const) {
1009      switch (e->Iex.Unop.op) {
1010         case Iop_1Uto8:
1011            e2 = IRExpr_Const(IRConst_U8(toUChar(
1012                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1013                    ? 1 : 0)));
1014            break;
1015         case Iop_1Uto32:
1016            e2 = IRExpr_Const(IRConst_U32(
1017                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1018                    ? 1 : 0));
1019            break;
1020         case Iop_1Uto64:
1021            e2 = IRExpr_Const(IRConst_U64(
1022                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1023                    ? 1 : 0));
1024            break;
1025
1026         case Iop_1Sto8:
1027            e2 = IRExpr_Const(IRConst_U8(toUChar(
1028                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1029                    ? 0xFF : 0)));
1030            break;
1031         case Iop_1Sto16:
1032            e2 = IRExpr_Const(IRConst_U16(toUShort(
1033                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1034                    ? 0xFFFF : 0)));
1035            break;
1036         case Iop_1Sto32:
1037            e2 = IRExpr_Const(IRConst_U32(
1038                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1039                    ? 0xFFFFFFFF : 0));
1040            break;
1041         case Iop_1Sto64:
1042            e2 = IRExpr_Const(IRConst_U64(
1043                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1044                    ? 0xFFFFFFFFFFFFFFFFULL : 0));
1045            break;
1046
1047         case Iop_8Sto32: {
1048            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1049            s32 <<= 24;
1050            s32 >>= 24;
1051            e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1052            break;
1053         }
1054         case Iop_16Sto32: {
1055            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1056            s32 <<= 16;
1057            s32 >>= 16;
1058            e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
1059            break;
1060         }
1061         case Iop_8Uto64:
1062            e2 = IRExpr_Const(IRConst_U64(
1063                    0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1064            break;
1065         case Iop_16Uto64:
1066            e2 = IRExpr_Const(IRConst_U64(
1067                    0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1068            break;
1069         case Iop_8Uto32:
1070            e2 = IRExpr_Const(IRConst_U32(
1071                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1072            break;
1073         case Iop_8Sto16: {
1074            /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1075            s16 <<= 8;
1076            s16 >>= 8;
1077            e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
1078            break;
1079         }
1080         case Iop_8Uto16:
1081            e2 = IRExpr_Const(IRConst_U16(
1082                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1083            break;
1084         case Iop_16Uto32:
1085            e2 = IRExpr_Const(IRConst_U32(
1086                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1087            break;
1088         case Iop_32to16:
1089            e2 = IRExpr_Const(IRConst_U16(toUShort(
1090                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1091            break;
1092         case Iop_32to8:
1093            e2 = IRExpr_Const(IRConst_U8(toUChar(
1094                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1095            break;
1096         case Iop_32to1:
1097            e2 = IRExpr_Const(IRConst_U1(toBool(
1098                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1099                 )));
1100            break;
1101         case Iop_64to1:
1102            e2 = IRExpr_Const(IRConst_U1(toBool(
1103                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1104                 )));
1105            break;
1106
1107         case Iop_Not64:
1108            e2 = IRExpr_Const(IRConst_U64(
1109                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1110            break;
1111         case Iop_Not32:
1112            e2 = IRExpr_Const(IRConst_U32(
1113                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1114            break;
1115         case Iop_Not16:
1116            e2 = IRExpr_Const(IRConst_U16(toUShort(
1117                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1118            break;
1119         case Iop_Not8:
1120            e2 = IRExpr_Const(IRConst_U8(toUChar(
1121                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1122            break;
1123
1124         case Iop_Not1:
1125            e2 = IRExpr_Const(IRConst_U1(
1126                    notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1127            break;
1128
1129         case Iop_64to8: {
1130            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1131            w64 &= 0xFFULL;
1132            e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1133            break;
1134         }
1135         case Iop_64to16: {
1136            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1137            w64 &= 0xFFFFULL;
1138            e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1139            break;
1140         }
1141         case Iop_64to32: {
1142            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1143            w64 &= 0x00000000FFFFFFFFULL;
1144            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1145            break;
1146         }
1147         case Iop_64HIto32: {
1148            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1149            w64 >>= 32;
1150            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1151            break;
1152         }
1153         case Iop_32Uto64:
1154            e2 = IRExpr_Const(IRConst_U64(
1155                    0xFFFFFFFFULL
1156                    & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1157            break;
1158         case Iop_16Sto64: {
1159            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1160            s64 <<= 48;
1161            s64 >>= 48;
1162            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1163            break;
1164         }
1165         case Iop_32Sto64: {
1166            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1167            s64 <<= 32;
1168            s64 >>= 32;
1169            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1170            break;
1171         }
1172
1173         case Iop_16to8: {
1174            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1175            w16 &= 0xFF;
1176            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1177            break;
1178         }
1179         case Iop_16HIto8: {
1180            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1181            w16 >>= 8;
1182            w16 &= 0xFF;
1183            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1184            break;
1185         }
1186
1187         case Iop_CmpNEZ8:
1188            e2 = IRExpr_Const(IRConst_U1(toBool(
1189                    0 !=
1190                    (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1191                 )));
1192            break;
1193         case Iop_CmpNEZ32:
1194            e2 = IRExpr_Const(IRConst_U1(toBool(
1195                    0 !=
1196                    (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1197                 )));
1198            break;
1199         case Iop_CmpNEZ64:
1200            e2 = IRExpr_Const(IRConst_U1(toBool(
1201                    0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1202                 )));
1203            break;
1204
1205         case Iop_CmpwNEZ32: {
1206            UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1207            if (w32 == 0)
1208               e2 = IRExpr_Const(IRConst_U32( 0 ));
1209            else
1210               e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1211            break;
1212         }
1213         case Iop_CmpwNEZ64: {
1214            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1215            if (w64 == 0)
1216               e2 = IRExpr_Const(IRConst_U64( 0 ));
1217            else
1218               e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1219            break;
1220         }
1221
1222         case Iop_Left32: {
1223            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1224            Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1225            s32 = (s32 | (-s32));
1226            e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1227            break;
1228         }
1229
1230         case Iop_Left64: {
1231            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1232            Long  s64 = (Long)u64;
1233            s64 = (s64 | (-s64));
1234            e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1235            break;
1236         }
1237
1238         case Iop_Clz32: {
1239            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1240            if (u32 != 0)
1241               e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1242            break;
1243         }
1244         case Iop_Clz64: {
1245            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1246            if (u64 != 0ULL)
1247               e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1248            break;
1249         }
1250
1251         default:
1252            goto unhandled;
1253      }
1254   }
1255
1256   /* BINARY ops */
1257   if (e->tag == Iex_Binop) {
1258      if (e->Iex.Binop.arg1->tag == Iex_Const
1259          && e->Iex.Binop.arg2->tag == Iex_Const) {
1260         /* cases where both args are consts */
1261         switch (e->Iex.Binop.op) {
1262
1263            /* -- Or -- */
1264            case Iop_Or8:
1265               e2 = IRExpr_Const(IRConst_U8(toUChar(
1266                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1267                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1268               break;
1269            case Iop_Or16:
1270               e2 = IRExpr_Const(IRConst_U16(toUShort(
1271                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1272                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1273               break;
1274            case Iop_Or32:
1275               e2 = IRExpr_Const(IRConst_U32(
1276                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1277                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1278               break;
1279            case Iop_Or64:
1280               e2 = IRExpr_Const(IRConst_U64(
1281                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1282                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1283               break;
1284
1285            /* -- Xor -- */
1286            case Iop_Xor8:
1287               e2 = IRExpr_Const(IRConst_U8(toUChar(
1288                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1289                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1290               break;
1291            case Iop_Xor16:
1292               e2 = IRExpr_Const(IRConst_U16(toUShort(
1293                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1294                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1295               break;
1296            case Iop_Xor32:
1297               e2 = IRExpr_Const(IRConst_U32(
1298                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1299                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1300               break;
1301            case Iop_Xor64:
1302               e2 = IRExpr_Const(IRConst_U64(
1303                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1304                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1305               break;
1306
1307            /* -- And -- */
1308            case Iop_And8:
1309               e2 = IRExpr_Const(IRConst_U8(toUChar(
1310                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1311                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1312               break;
1313            case Iop_And16:
1314               e2 = IRExpr_Const(IRConst_U16(toUShort(
1315                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1316                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1317               break;
1318            case Iop_And32:
1319               e2 = IRExpr_Const(IRConst_U32(
1320                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1321                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1322               break;
1323            case Iop_And64:
1324               e2 = IRExpr_Const(IRConst_U64(
1325                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1326                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1327               break;
1328
1329            /* -- Add -- */
1330            case Iop_Add8:
1331               e2 = IRExpr_Const(IRConst_U8(toUChar(
1332                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1333                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1334               break;
1335            case Iop_Add32:
1336               e2 = IRExpr_Const(IRConst_U32(
1337                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1338                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1339               break;
1340            case Iop_Add64:
1341               e2 = IRExpr_Const(IRConst_U64(
1342                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1343                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1344               break;
1345
1346            /* -- Sub -- */
1347            case Iop_Sub8:
1348               e2 = IRExpr_Const(IRConst_U8(toUChar(
1349                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1350                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1351               break;
1352            case Iop_Sub32:
1353               e2 = IRExpr_Const(IRConst_U32(
1354                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1355                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1356               break;
1357            case Iop_Sub64:
1358               e2 = IRExpr_Const(IRConst_U64(
1359                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1360                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1361               break;
1362
1363            /* -- Max32U -- */
1364            case Iop_Max32U: {
1365               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1366               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1367               UInt res  = u32a > u32b ? u32a : u32b;
1368               e2 = IRExpr_Const(IRConst_U32(res));
1369               break;
1370            }
1371
1372            /* -- Mul -- */
1373            case Iop_Mul32:
1374               e2 = IRExpr_Const(IRConst_U32(
1375                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1376                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1377               break;
1378            case Iop_Mul64:
1379               e2 = IRExpr_Const(IRConst_U64(
1380                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1381                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1382               break;
1383
1384            case Iop_MullS32: {
1385               /* very paranoid */
1386               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1387               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1388               Int   s32a = (Int)u32a;
1389               Int   s32b = (Int)u32b;
1390               Long  s64a = (Long)s32a;
1391               Long  s64b = (Long)s32b;
1392               Long  sres = s64a * s64b;
1393               ULong ures = (ULong)sres;
1394               e2 = IRExpr_Const(IRConst_U64(ures));
1395               break;
1396            }
1397
1398            /* -- Shl -- */
1399            case Iop_Shl32:
1400               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1401               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1402               if (shift >= 0 && shift <= 31)
1403                  e2 = IRExpr_Const(IRConst_U32(
1404                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1405                           << shift)));
1406               break;
1407            case Iop_Shl64:
1408               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1409               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1410               if (shift >= 0 && shift <= 63)
1411                  e2 = IRExpr_Const(IRConst_U64(
1412                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1413                           << shift)));
1414               break;
1415
1416            /* -- Sar -- */
1417            case Iop_Sar32: {
1418               /* paranoid ... */
1419               /*signed*/ Int s32;
1420               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1421               s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1422               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1423               if (shift >= 0 && shift <= 31) {
1424                  s32 >>=/*signed*/ shift;
1425                  e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1426               }
1427               break;
1428            }
1429            case Iop_Sar64: {
1430               /* paranoid ... */
1431               /*signed*/ Long s64;
1432               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1433               s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1434               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1435               if (shift >= 0 && shift <= 63) {
1436                  s64 >>=/*signed*/ shift;
1437                  e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1438               }
1439               break;
1440            }
1441
1442            /* -- Shr -- */
1443            case Iop_Shr32: {
1444               /* paranoid ... */
1445               /*unsigned*/ UInt u32;
1446               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1447               u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1448               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1449               if (shift >= 0 && shift <= 31) {
1450                  u32 >>=/*unsigned*/ shift;
1451                  e2 = IRExpr_Const(IRConst_U32(u32));
1452               }
1453               break;
1454            }
1455            case Iop_Shr64: {
1456               /* paranoid ... */
1457               /*unsigned*/ ULong u64;
1458               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1459               u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1460               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1461               if (shift >= 0 && shift <= 63) {
1462                  u64 >>=/*unsigned*/ shift;
1463                  e2 = IRExpr_Const(IRConst_U64(u64));
1464               }
1465               break;
1466            }
1467
1468            /* -- CmpEQ -- */
1469            case Iop_CmpEQ32:
1470               e2 = IRExpr_Const(IRConst_U1(toBool(
1471                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1472                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1473               break;
1474            case Iop_CmpEQ64:
1475               e2 = IRExpr_Const(IRConst_U1(toBool(
1476                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1477                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1478               break;
1479
1480            /* -- CmpNE -- */
1481            case Iop_CmpNE8:
1482               e2 = IRExpr_Const(IRConst_U1(toBool(
1483                       ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1484                        != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1485               break;
1486            case Iop_CmpNE32:
1487               e2 = IRExpr_Const(IRConst_U1(toBool(
1488                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1489                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1490               break;
1491            case Iop_CmpNE64:
1492               e2 = IRExpr_Const(IRConst_U1(toBool(
1493                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1494                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1495               break;
1496
1497            /* -- CmpLEU -- */
1498            case Iop_CmpLE32U:
1499               e2 = IRExpr_Const(IRConst_U1(toBool(
1500                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1501                        <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1502               break;
1503            case Iop_CmpLE64U:
1504               e2 = IRExpr_Const(IRConst_U1(toBool(
1505                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1506                        <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1507               break;
1508
1509            /* -- CmpLES -- */
1510            case Iop_CmpLE32S:
1511               e2 = IRExpr_Const(IRConst_U1(toBool(
1512                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1513                        <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1514               break;
1515            case Iop_CmpLE64S:
1516               e2 = IRExpr_Const(IRConst_U1(toBool(
1517                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1518                        <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1519               break;
1520
1521            /* -- CmpLTS -- */
1522            case Iop_CmpLT32S:
1523               e2 = IRExpr_Const(IRConst_U1(toBool(
1524                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1525                        < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1526               break;
1527            case Iop_CmpLT64S:
1528               e2 = IRExpr_Const(IRConst_U1(toBool(
1529                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1530                        < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1531               break;
1532
1533            /* -- CmpLTU -- */
1534            case Iop_CmpLT32U:
1535               e2 = IRExpr_Const(IRConst_U1(toBool(
1536                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1537                        < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1538               break;
1539            case Iop_CmpLT64U:
1540               e2 = IRExpr_Const(IRConst_U1(toBool(
1541                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1542                        < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1543               break;
1544
1545            /* -- CmpORD -- */
1546            case Iop_CmpORD32S: {
1547               /* very paranoid */
1548               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1549               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1550               Int   s32a = (Int)u32a;
1551               Int   s32b = (Int)u32b;
1552               Int   r = 0x2; /* EQ */
1553               if (s32a < s32b) {
1554                  r = 0x8; /* LT */
1555               }
1556               else if (s32a > s32b) {
1557                  r = 0x4; /* GT */
1558               }
1559               e2 = IRExpr_Const(IRConst_U32(r));
1560               break;
1561            }
1562
1563            /* -- nHLto2n -- */
1564            case Iop_32HLto64:
1565               e2 = IRExpr_Const(IRConst_U64(
1566                       (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1567                       | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1568                    ));
1569               break;
1570            case Iop_64HLto128:
1571               /* We can't fold this, because there is no way to
1572                  express he result in IR, but at least pretend to
1573                  handle it, so as to stop getting blasted with
1574                  no-rule-for-this-primop messages. */
1575               break;
1576
1577            default:
1578               goto unhandled;
1579         }
1580
1581      } else {
1582
1583         /* other cases (identities, etc) */
1584
1585         /* Shl64/Shr64(x,0) ==> x */
1586         if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
1587             && e->Iex.Binop.arg2->tag == Iex_Const
1588             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1589            e2 = e->Iex.Binop.arg1;
1590         } else
1591
1592         /* Shl32/Shr32(x,0) ==> x */
1593         if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
1594             && e->Iex.Binop.arg2->tag == Iex_Const
1595             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1596            e2 = e->Iex.Binop.arg1;
1597         } else
1598
1599         /* Or8(x,0) ==> x */
1600         if ((e->Iex.Binop.op == Iop_Or8)
1601             && e->Iex.Binop.arg2->tag == Iex_Const
1602             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1603            e2 = e->Iex.Binop.arg1;
1604         } else
1605
1606         /* Or16(x,0) ==> x */
1607         if ((e->Iex.Binop.op == Iop_Or16)
1608             && e->Iex.Binop.arg2->tag == Iex_Const
1609             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
1610            e2 = e->Iex.Binop.arg1;
1611         } else
1612
1613         /* Or32/Add32/Max32U(x,0) ==> x
1614            Or32/Add32/Max32U(0,x) ==> x */
1615         if (e->Iex.Binop.op == Iop_Add32
1616             || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U) {
1617            if (isZeroU32(e->Iex.Binop.arg2))
1618               e2 = e->Iex.Binop.arg1;
1619            else if (isZeroU32(e->Iex.Binop.arg1))
1620               e2 = e->Iex.Binop.arg2;
1621         } else
1622
1623         /* Sub64(x,0) ==> x */
1624         if (e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg2)) {
1625            e2 = e->Iex.Binop.arg1;
1626         } else
1627
1628         /* Add32(t,t) ==> t << 1.  Memcheck doesn't understand that
1629            x+x produces a defined least significant bit, and it seems
1630            simplest just to get rid of the problem by rewriting it
1631            out, since the opportunity to do so exists. */
1632         if (e->Iex.Binop.op == Iop_Add32
1633             && e->Iex.Binop.arg1->tag == Iex_RdTmp
1634             && e->Iex.Binop.arg2->tag == Iex_RdTmp
1635             && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1636                == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1637            e2 = IRExpr_Binop(Iop_Shl32,
1638                              e->Iex.Binop.arg1,
1639                              IRExpr_Const(IRConst_U8(1)));
1640         } else
1641
1642         /* Add64(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
1643         if (e->Iex.Binop.op == Iop_Add64
1644             && e->Iex.Binop.arg1->tag == Iex_RdTmp
1645             && e->Iex.Binop.arg2->tag == Iex_RdTmp
1646             && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1647                == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1648            e2 = IRExpr_Binop(Iop_Shl64,
1649                              e->Iex.Binop.arg1,
1650                              IRExpr_Const(IRConst_U8(1)));
1651         } else
1652
1653         /* Add8(t,t) ==> t << 1;  rationale as for Add32(t,t) above. */
1654         if (e->Iex.Binop.op == Iop_Add8
1655             && e->Iex.Binop.arg1->tag == Iex_RdTmp
1656             && e->Iex.Binop.arg2->tag == Iex_RdTmp
1657             && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1658                == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1659            e2 = IRExpr_Binop(Iop_Shl8,
1660                              e->Iex.Binop.arg1,
1661                              IRExpr_Const(IRConst_U8(1)));
1662         } else
1663         /* NB no Add16(t,t) case yet as no known test case exists */
1664
1665         /* Or64/Add64(x,0) ==> x
1666            Or64/Add64(0,x) ==> x */
1667         if (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64) {
1668            if (isZeroU64(e->Iex.Binop.arg2))
1669               e2 = e->Iex.Binop.arg1;
1670            else if (isZeroU64(e->Iex.Binop.arg1))
1671               e2 = e->Iex.Binop.arg2;
1672         } else
1673
1674         /* And32(x,0xFFFFFFFF) ==> x */
1675         if (e->Iex.Binop.op == Iop_And32
1676             && e->Iex.Binop.arg2->tag == Iex_Const
1677             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1678            e2 = e->Iex.Binop.arg1;
1679         } else
1680
1681         /* And32(x,0) ==> 0 */
1682         if (e->Iex.Binop.op == Iop_And32
1683             && e->Iex.Binop.arg2->tag == Iex_Const
1684             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1685            e2 = IRExpr_Const(IRConst_U32(0));
1686         } else
1687
1688         /* And32/Shl32(0,x) ==> 0 */
1689         if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
1690             && e->Iex.Binop.arg1->tag == Iex_Const
1691             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1692            e2 = IRExpr_Const(IRConst_U32(0));
1693         } else
1694
1695         /* Or8(0,x) ==> x */
1696         if (e->Iex.Binop.op == Iop_Or8
1697             && e->Iex.Binop.arg1->tag == Iex_Const
1698             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
1699            e2 = e->Iex.Binop.arg2;
1700         } else
1701
1702         /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
1703         /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1704         /* Max32U(t,t) ==> t, for some IRTemp t */
1705         switch (e->Iex.Binop.op) {
1706            case Iop_And64: case Iop_And32:
1707            case Iop_And16: case Iop_And8:
1708            case Iop_Or64: case Iop_Or32:
1709            case Iop_Or16: case Iop_Or8: case Iop_OrV128:
1710            case Iop_Max32U:
1711               if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1712                  e2 = e->Iex.Binop.arg1;
1713               break;
1714            default:
1715               break;
1716         }
1717
1718         /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
1719         /* Sub32/64(t,t) ==> 0, for some IRTemp t */
1720         switch (e->Iex.Binop.op) {
1721            case Iop_Xor64: case Iop_Xor32:
1722            case Iop_Xor16: case Iop_Xor8:
1723            case Iop_XorV128:
1724            case Iop_Sub64: case Iop_Sub32:
1725               if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1726                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
1727               break;
1728            default:
1729               break;
1730         }
1731
1732         switch (e->Iex.Binop.op) {
1733            case Iop_CmpEQ64:
1734            case Iop_CmpEQ8x8:
1735            case Iop_CmpEQ8x16:
1736               if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1737                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
1738               break;
1739            default:
1740               break;
1741         }
1742
1743      }
1744   }
1745
1746   /* Mux0X */
1747   if (e->tag == Iex_Mux0X) {
1748      /* is the discriminant is a constant? */
1749      if (e->Iex.Mux0X.cond->tag == Iex_Const) {
1750         Bool zero;
1751         /* assured us by the IR type rules */
1752         vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
1753         zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1754                                     ->Iex.Const.con->Ico.U8));
1755         e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1756      }
1757      else
1758      /* are the arms identical? (pretty weedy test) */
1759      if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
1760                               e->Iex.Mux0X.exprX)) {
1761         e2 = e->Iex.Mux0X.expr0;
1762      }
1763   }
1764
1765   /* Show cases where we've found but not folded 'op(t,t)'. */
1766   if (0 && e == e2 && e->tag == Iex_Binop
1767       && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1768      vex_printf("IDENT: ");
1769      ppIRExpr(e); vex_printf("\n");
1770   }
1771
1772   /* Show the overall results of folding. */
1773   if (DEBUG_IROPT && e2 != e) {
1774      vex_printf("FOLD: ");
1775      ppIRExpr(e); vex_printf("  ->  ");
1776      ppIRExpr(e2); vex_printf("\n");
1777   }
1778
1779   return e2;
1780
1781 unhandled:
1782#  if 0
1783   vex_printf("\n\n");
1784   ppIRExpr(e);
1785   vpanic("fold_Expr: no rule for the above");
1786#  else
1787   if (vex_control.iropt_verbosity > 0) {
1788      vex_printf("vex iropt: fold_Expr: no rule for: ");
1789      ppIRExpr(e);
1790      vex_printf("\n");
1791   }
1792   return e2;
1793#  endif
1794}
1795
1796
1797/* Apply the subst to a simple 1-level expression -- guaranteed to be
1798   1-level due to previous flattening pass. */
1799
1800static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
1801{
1802   switch (ex->tag) {
1803      case Iex_RdTmp:
1804         if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
1805            return env[(Int)ex->Iex.RdTmp.tmp];
1806         } else {
1807            /* not bound in env */
1808            return ex;
1809         }
1810
1811      case Iex_Const:
1812      case Iex_Get:
1813         return ex;
1814
1815      case Iex_GetI:
1816         vassert(isIRAtom(ex->Iex.GetI.ix));
1817         return IRExpr_GetI(
1818            ex->Iex.GetI.descr,
1819            subst_Expr(env, ex->Iex.GetI.ix),
1820            ex->Iex.GetI.bias
1821         );
1822
1823      case Iex_Qop:
1824         vassert(isIRAtom(ex->Iex.Qop.arg1));
1825         vassert(isIRAtom(ex->Iex.Qop.arg2));
1826         vassert(isIRAtom(ex->Iex.Qop.arg3));
1827         vassert(isIRAtom(ex->Iex.Qop.arg4));
1828         return IRExpr_Qop(
1829                   ex->Iex.Qop.op,
1830                   subst_Expr(env, ex->Iex.Qop.arg1),
1831                   subst_Expr(env, ex->Iex.Qop.arg2),
1832                   subst_Expr(env, ex->Iex.Qop.arg3),
1833                   subst_Expr(env, ex->Iex.Qop.arg4)
1834                );
1835
1836      case Iex_Triop:
1837         vassert(isIRAtom(ex->Iex.Triop.arg1));
1838         vassert(isIRAtom(ex->Iex.Triop.arg2));
1839         vassert(isIRAtom(ex->Iex.Triop.arg3));
1840         return IRExpr_Triop(
1841                   ex->Iex.Triop.op,
1842                   subst_Expr(env, ex->Iex.Triop.arg1),
1843                   subst_Expr(env, ex->Iex.Triop.arg2),
1844                   subst_Expr(env, ex->Iex.Triop.arg3)
1845                );
1846
1847      case Iex_Binop:
1848         vassert(isIRAtom(ex->Iex.Binop.arg1));
1849         vassert(isIRAtom(ex->Iex.Binop.arg2));
1850         return IRExpr_Binop(
1851                   ex->Iex.Binop.op,
1852                   subst_Expr(env, ex->Iex.Binop.arg1),
1853                   subst_Expr(env, ex->Iex.Binop.arg2)
1854                );
1855
1856      case Iex_Unop:
1857         vassert(isIRAtom(ex->Iex.Unop.arg));
1858         return IRExpr_Unop(
1859                   ex->Iex.Unop.op,
1860                   subst_Expr(env, ex->Iex.Unop.arg)
1861                );
1862
1863      case Iex_Load:
1864         vassert(isIRAtom(ex->Iex.Load.addr));
1865         return IRExpr_Load(
1866                   ex->Iex.Load.end,
1867                   ex->Iex.Load.ty,
1868                   subst_Expr(env, ex->Iex.Load.addr)
1869                );
1870
1871      case Iex_CCall: {
1872         Int      i;
1873         IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
1874         for (i = 0; args2[i]; i++) {
1875            vassert(isIRAtom(args2[i]));
1876            args2[i] = subst_Expr(env, args2[i]);
1877         }
1878         return IRExpr_CCall(
1879                   ex->Iex.CCall.cee,
1880                   ex->Iex.CCall.retty,
1881                   args2
1882                );
1883      }
1884
1885      case Iex_Mux0X:
1886         vassert(isIRAtom(ex->Iex.Mux0X.cond));
1887         vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1888         vassert(isIRAtom(ex->Iex.Mux0X.exprX));
1889         return IRExpr_Mux0X(
1890                   subst_Expr(env, ex->Iex.Mux0X.cond),
1891                   subst_Expr(env, ex->Iex.Mux0X.expr0),
1892                   subst_Expr(env, ex->Iex.Mux0X.exprX)
1893                );
1894
1895      default:
1896         vex_printf("\n\n"); ppIRExpr(ex);
1897         vpanic("subst_Expr");
1898
1899   }
1900}
1901
1902
1903/* Apply the subst to stmt, then fold the result as much as possible.
1904   Much simplified due to stmt being previously flattened.  As a
1905   result of this, the stmt may wind up being turned into a no-op.
1906*/
1907static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
1908{
1909#  if 0
1910   vex_printf("\nsubst and fold stmt\n");
1911   ppIRStmt(st);
1912   vex_printf("\n");
1913#  endif
1914
1915   switch (st->tag) {
1916      case Ist_AbiHint:
1917         vassert(isIRAtom(st->Ist.AbiHint.base));
1918         vassert(isIRAtom(st->Ist.AbiHint.nia));
1919         return IRStmt_AbiHint(
1920                   fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
1921                   st->Ist.AbiHint.len,
1922                   fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
1923                );
1924      case Ist_Put:
1925         vassert(isIRAtom(st->Ist.Put.data));
1926         return IRStmt_Put(
1927                   st->Ist.Put.offset,
1928                   fold_Expr(subst_Expr(env, st->Ist.Put.data))
1929                );
1930
1931      case Ist_PutI:
1932         vassert(isIRAtom(st->Ist.PutI.ix));
1933         vassert(isIRAtom(st->Ist.PutI.data));
1934         return IRStmt_PutI(
1935                   st->Ist.PutI.descr,
1936                   fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
1937                   st->Ist.PutI.bias,
1938                   fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1939                );
1940
1941      case Ist_WrTmp:
1942         /* This is the one place where an expr (st->Ist.WrTmp.data) is
1943            allowed to be more than just a constant or a tmp. */
1944         return IRStmt_WrTmp(
1945                   st->Ist.WrTmp.tmp,
1946                   fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
1947                );
1948
1949      case Ist_Store:
1950         vassert(isIRAtom(st->Ist.Store.addr));
1951         vassert(isIRAtom(st->Ist.Store.data));
1952         return IRStmt_Store(
1953                   st->Ist.Store.end,
1954                   fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
1955                   fold_Expr(subst_Expr(env, st->Ist.Store.data))
1956                );
1957
1958      case Ist_CAS: {
1959         IRCAS *cas, *cas2;
1960         cas = st->Ist.CAS.details;
1961         vassert(isIRAtom(cas->addr));
1962         vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
1963         vassert(isIRAtom(cas->expdLo));
1964         vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
1965         vassert(isIRAtom(cas->dataLo));
1966         cas2 = mkIRCAS(
1967                   cas->oldHi, cas->oldLo, cas->end,
1968                   fold_Expr(subst_Expr(env, cas->addr)),
1969                   cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
1970                   fold_Expr(subst_Expr(env, cas->expdLo)),
1971                   cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
1972                   fold_Expr(subst_Expr(env, cas->dataLo))
1973                );
1974         return IRStmt_CAS(cas2);
1975      }
1976
1977      case Ist_LLSC:
1978         vassert(isIRAtom(st->Ist.LLSC.addr));
1979         if (st->Ist.LLSC.storedata)
1980            vassert(isIRAtom(st->Ist.LLSC.storedata));
1981         return IRStmt_LLSC(
1982                   st->Ist.LLSC.end,
1983                   st->Ist.LLSC.result,
1984                   fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
1985                   st->Ist.LLSC.storedata
1986                      ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
1987                      : NULL
1988                );
1989
1990      case Ist_Dirty: {
1991         Int     i;
1992         IRDirty *d, *d2;
1993         d = st->Ist.Dirty.details;
1994         d2 = emptyIRDirty();
1995         *d2 = *d;
1996         d2->args = shallowCopyIRExprVec(d2->args);
1997         if (d2->mFx != Ifx_None) {
1998            vassert(isIRAtom(d2->mAddr));
1999            d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
2000         }
2001         vassert(isIRAtom(d2->guard));
2002         d2->guard = fold_Expr(subst_Expr(env, d2->guard));
2003         for (i = 0; d2->args[i]; i++) {
2004            vassert(isIRAtom(d2->args[i]));
2005            d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
2006         }
2007         return IRStmt_Dirty(d2);
2008      }
2009
2010      case Ist_IMark:
2011         return IRStmt_IMark(st->Ist.IMark.addr,
2012                             st->Ist.IMark.len,
2013                             st->Ist.IMark.delta);
2014
2015      case Ist_NoOp:
2016         return IRStmt_NoOp();
2017
2018      case Ist_MBE:
2019         return IRStmt_MBE(st->Ist.MBE.event);
2020
2021      case Ist_Exit: {
2022         IRExpr* fcond;
2023         vassert(isIRAtom(st->Ist.Exit.guard));
2024         fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
2025         if (fcond->tag == Iex_Const) {
2026            /* Interesting.  The condition on this exit has folded down to
2027               a constant. */
2028            vassert(fcond->Iex.Const.con->tag == Ico_U1);
2029            vassert(fcond->Iex.Const.con->Ico.U1 == False
2030                    || fcond->Iex.Const.con->Ico.U1 == True);
2031            if (fcond->Iex.Const.con->Ico.U1 == False) {
2032               /* exit is never going to happen, so dump the statement. */
2033               return IRStmt_NoOp();
2034            } else {
2035               vassert(fcond->Iex.Const.con->Ico.U1 == True);
2036               /* Hmmm.  The exit has become unconditional.  Leave it
2037                  as it is for now, since we'd have to truncate the BB
2038                  at this point, which is tricky.  Such truncation is
2039                  done later by the dead-code elimination pass. */
2040               /* fall out into the reconstruct-the-exit code. */
2041               if (vex_control.iropt_verbosity > 0)
2042                  /* really a misuse of vex_control.iropt_verbosity */
2043                  vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2044            }
2045         }
2046         return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
2047      }
2048
2049   default:
2050      vex_printf("\n"); ppIRStmt(st);
2051      vpanic("subst_and_fold_Stmt");
2052   }
2053}
2054
2055
2056IRSB* cprop_BB ( IRSB* in )
2057{
2058   Int      i;
2059   IRSB*    out;
2060   IRStmt*  st2;
2061   Int      n_tmps = in->tyenv->types_used;
2062   IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
2063
2064   out = emptyIRSB();
2065   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2066
2067   /* Set up the env with which travels forward.  This holds a
2068      substitution, mapping IRTemps to atoms, that is, IRExprs which
2069      are either IRTemps or IRConsts.  Thus, copy and constant
2070      propagation is done.  The environment is to be applied as we
2071      move along.  Keys are IRTemps.  Values are IRExpr*s.
2072   */
2073   for (i = 0; i < n_tmps; i++)
2074      env[i] = NULL;
2075
2076   /* For each original SSA-form stmt ... */
2077   for (i = 0; i < in->stmts_used; i++) {
2078
2079      /* First apply the substitution to the current stmt.  This
2080         propagates in any constants and tmp-tmp assignments
2081         accumulated prior to this point.  As part of the subst_Stmt
2082         call, also then fold any constant expressions resulting. */
2083
2084      st2 = in->stmts[i];
2085
2086      /* perhaps st2 is already a no-op? */
2087      if (st2->tag == Ist_NoOp) continue;
2088
2089      st2 = subst_and_fold_Stmt( env, st2 );
2090
2091      /* If the statement has been folded into a no-op, forget it. */
2092      if (st2->tag == Ist_NoOp) continue;
2093
2094      /* Now consider what the stmt looks like.  If it's of the form
2095         't = const' or 't1 = t2', add it to the running environment
2096         and not to the output BB.  Otherwise, add it to the output
2097         BB.  Note, we choose not to propagate const when const is an
2098         F64i, so that F64i literals can be CSE'd later.  This helps
2099         x86 floating point code generation. */
2100
2101      if (st2->tag == Ist_WrTmp
2102          && st2->Ist.WrTmp.data->tag == Iex_Const
2103          && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2104         /* 't = const' -- add to env.
2105             The pair (IRTemp, IRExpr*) is added. */
2106         env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2107      }
2108      else
2109      if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
2110         /* 't1 = t2' -- add to env.
2111             The pair (IRTemp, IRExpr*) is added. */
2112         env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2113      }
2114      else {
2115         /* Not interesting, copy st2 into the output block. */
2116         addStmtToIRSB( out, st2 );
2117      }
2118   }
2119
2120   out->next     = subst_Expr( env, in->next );
2121   out->jumpkind = in->jumpkind;
2122   return out;
2123}
2124
2125
2126/*---------------------------------------------------------------*/
2127/*--- Dead code (t = E) removal                               ---*/
2128/*---------------------------------------------------------------*/
2129
2130/* As a side effect, also removes all code following an unconditional
2131   side exit. */
2132
2133/* The type of the HashHW map is: a map from IRTemp to nothing
2134   -- really just operating a set or IRTemps.
2135*/
2136
2137inline
2138static void addUses_Temp ( Bool* set, IRTemp tmp )
2139{
2140   set[(Int)tmp] = True;
2141}
2142
2143static void addUses_Expr ( Bool* set, IRExpr* e )
2144{
2145   Int i;
2146   switch (e->tag) {
2147      case Iex_GetI:
2148         addUses_Expr(set, e->Iex.GetI.ix);
2149         return;
2150      case Iex_Mux0X:
2151         addUses_Expr(set, e->Iex.Mux0X.cond);
2152         addUses_Expr(set, e->Iex.Mux0X.expr0);
2153         addUses_Expr(set, e->Iex.Mux0X.exprX);
2154         return;
2155      case Iex_CCall:
2156         for (i = 0; e->Iex.CCall.args[i]; i++)
2157            addUses_Expr(set, e->Iex.CCall.args[i]);
2158         return;
2159      case Iex_Load:
2160         addUses_Expr(set, e->Iex.Load.addr);
2161         return;
2162      case Iex_Qop:
2163         addUses_Expr(set, e->Iex.Qop.arg1);
2164         addUses_Expr(set, e->Iex.Qop.arg2);
2165         addUses_Expr(set, e->Iex.Qop.arg3);
2166         addUses_Expr(set, e->Iex.Qop.arg4);
2167         return;
2168      case Iex_Triop:
2169         addUses_Expr(set, e->Iex.Triop.arg1);
2170         addUses_Expr(set, e->Iex.Triop.arg2);
2171         addUses_Expr(set, e->Iex.Triop.arg3);
2172         return;
2173      case Iex_Binop:
2174         addUses_Expr(set, e->Iex.Binop.arg1);
2175         addUses_Expr(set, e->Iex.Binop.arg2);
2176         return;
2177      case Iex_Unop:
2178         addUses_Expr(set, e->Iex.Unop.arg);
2179         return;
2180      case Iex_RdTmp:
2181         addUses_Temp(set, e->Iex.RdTmp.tmp);
2182         return;
2183      case Iex_Const:
2184      case Iex_Get:
2185         return;
2186      default:
2187         vex_printf("\n");
2188         ppIRExpr(e);
2189         vpanic("addUses_Expr");
2190   }
2191}
2192
2193static void addUses_Stmt ( Bool* set, IRStmt* st )
2194{
2195   Int      i;
2196   IRDirty* d;
2197   IRCAS*   cas;
2198   switch (st->tag) {
2199      case Ist_AbiHint:
2200         addUses_Expr(set, st->Ist.AbiHint.base);
2201         addUses_Expr(set, st->Ist.AbiHint.nia);
2202         return;
2203      case Ist_PutI:
2204         addUses_Expr(set, st->Ist.PutI.ix);
2205         addUses_Expr(set, st->Ist.PutI.data);
2206         return;
2207      case Ist_WrTmp:
2208         addUses_Expr(set, st->Ist.WrTmp.data);
2209         return;
2210      case Ist_Put:
2211         addUses_Expr(set, st->Ist.Put.data);
2212         return;
2213      case Ist_Store:
2214         addUses_Expr(set, st->Ist.Store.addr);
2215         addUses_Expr(set, st->Ist.Store.data);
2216         return;
2217      case Ist_CAS:
2218         cas = st->Ist.CAS.details;
2219         addUses_Expr(set, cas->addr);
2220         if (cas->expdHi)
2221            addUses_Expr(set, cas->expdHi);
2222         addUses_Expr(set, cas->expdLo);
2223         if (cas->dataHi)
2224            addUses_Expr(set, cas->dataHi);
2225         addUses_Expr(set, cas->dataLo);
2226         return;
2227      case Ist_LLSC:
2228         addUses_Expr(set, st->Ist.LLSC.addr);
2229         if (st->Ist.LLSC.storedata)
2230            addUses_Expr(set, st->Ist.LLSC.storedata);
2231         return;
2232      case Ist_Dirty:
2233         d = st->Ist.Dirty.details;
2234         if (d->mFx != Ifx_None)
2235            addUses_Expr(set, d->mAddr);
2236         addUses_Expr(set, d->guard);
2237         for (i = 0; d->args[i] != NULL; i++)
2238            addUses_Expr(set, d->args[i]);
2239         return;
2240      case Ist_NoOp:
2241      case Ist_IMark:
2242      case Ist_MBE:
2243         return;
2244      case Ist_Exit:
2245         addUses_Expr(set, st->Ist.Exit.guard);
2246         return;
2247      default:
2248         vex_printf("\n");
2249         ppIRStmt(st);
2250         vpanic("addUses_Stmt");
2251   }
2252}
2253
2254
2255/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
2256static Bool isZeroU1 ( IRExpr* e )
2257{
2258   return toBool( e->tag == Iex_Const
2259                  && e->Iex.Const.con->tag == Ico_U1
2260                  && e->Iex.Const.con->Ico.U1 == False );
2261}
2262
2263/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
2264static Bool isOneU1 ( IRExpr* e )
2265{
2266   return toBool( e->tag == Iex_Const
2267                  && e->Iex.Const.con->tag == Ico_U1
2268                  && e->Iex.Const.con->Ico.U1 == True );
2269}
2270
2271
2272/* Note, this destructively modifies the given IRSB. */
2273
2274/* Scan backwards through statements, carrying a set of IRTemps which
2275   are known to be used after the current point.  On encountering 't =
2276   E', delete the binding if it is not used.  Otherwise, add any temp
2277   uses to the set and keep on moving backwards.
2278
2279   As an enhancement, the first (backwards) pass searches for IR exits
2280   with always-taken conditions and notes the location of the earliest
2281   one in the block.  If any such are found, a second pass copies the
2282   exit destination and jump kind to the bb-end.  Then, the exit and
2283   all statements following it are turned into no-ops.
2284*/
2285
2286/* notstatic */ void do_deadcode_BB ( IRSB* bb )
2287{
2288   Int     i, i_unconditional_exit;
2289   Int     n_tmps = bb->tyenv->types_used;
2290   Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2291   IRStmt* st;
2292
2293   for (i = 0; i < n_tmps; i++)
2294      set[i] = False;
2295
2296   /* start off by recording IRTemp uses in the next field. */
2297   addUses_Expr(set, bb->next);
2298
2299   /* First pass */
2300
2301   /* Work backwards through the stmts */
2302   i_unconditional_exit = -1;
2303   for (i = bb->stmts_used-1; i >= 0; i--) {
2304      st = bb->stmts[i];
2305      if (st->tag == Ist_NoOp)
2306         continue;
2307      /* take note of any unconditional exits */
2308      if (st->tag == Ist_Exit
2309          && isOneU1(st->Ist.Exit.guard))
2310         i_unconditional_exit = i;
2311      if (st->tag == Ist_WrTmp
2312          && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2313          /* it's an IRTemp which never got used.  Delete it. */
2314         if (DEBUG_IROPT) {
2315            vex_printf("DEAD: ");
2316            ppIRStmt(st);
2317            vex_printf("\n");
2318         }
2319         bb->stmts[i] = IRStmt_NoOp();
2320      }
2321      else
2322      if (st->tag == Ist_Dirty
2323          && st->Ist.Dirty.details->guard
2324          && isZeroU1(st->Ist.Dirty.details->guard)) {
2325         /* This is a dirty helper which will never get called.
2326            Delete it. */
2327         bb->stmts[i] = IRStmt_NoOp();
2328       }
2329       else {
2330         /* Note any IRTemp uses made by the current statement. */
2331         addUses_Stmt(set, st);
2332      }
2333   }
2334
2335   /* Optional second pass: if any unconditional exits were found,
2336      delete them and all following statements. */
2337
2338   if (i_unconditional_exit != -1) {
2339      if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2340                        i_unconditional_exit);
2341      vassert(i_unconditional_exit >= 0
2342              && i_unconditional_exit < bb->stmts_used);
2343      bb->next
2344         = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2345      bb->jumpkind
2346         = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2347      for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2348         bb->stmts[i] = IRStmt_NoOp();
2349   }
2350}
2351
2352
2353/*---------------------------------------------------------------*/
2354/*--- Specialisation of helper function calls, in             ---*/
2355/*--- collaboration with the front end                        ---*/
2356/*---------------------------------------------------------------*/
2357
2358static
2359IRSB* spec_helpers_BB(
2360         IRSB* bb,
2361         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
2362      )
2363{
2364   Int     i;
2365   IRStmt* st;
2366   IRExpr* ex;
2367   Bool    any = False;
2368
2369   for (i = bb->stmts_used-1; i >= 0; i--) {
2370      st = bb->stmts[i];
2371
2372      if (st->tag != Ist_WrTmp
2373          || st->Ist.WrTmp.data->tag != Iex_CCall)
2374         continue;
2375
2376      ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2377                          st->Ist.WrTmp.data->Iex.CCall.args,
2378                          &bb->stmts[0], i );
2379      if (!ex)
2380        /* the front end can't think of a suitable replacement */
2381        continue;
2382
2383      /* We got something better.  Install it in the bb. */
2384      any = True;
2385      bb->stmts[i]
2386         = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2387
2388      if (0) {
2389         vex_printf("SPEC: ");
2390         ppIRExpr(st->Ist.WrTmp.data);
2391         vex_printf("  -->  ");
2392         ppIRExpr(ex);
2393         vex_printf("\n");
2394      }
2395   }
2396
2397   if (any)
2398      bb = flatten_BB(bb);
2399   return bb;
2400}
2401
2402
2403/*---------------------------------------------------------------*/
2404/*--- Determination of guest state aliasing relationships     ---*/
2405/*---------------------------------------------------------------*/
2406
2407/* These are helper functions for CSE and GetI/PutI transformations.
2408
2409   Determine, to the extent possible, the relationship between two
2410   guest state accesses.  The possible outcomes are:
2411
2412   * Exact alias.  These two accesses denote precisely the same
2413     piece of the guest state.
2414
2415   * Definitely no alias.  These two accesses are guaranteed not to
2416     overlap any part of the guest state.
2417
2418   * Unknown -- if neither of the above can be established.
2419
2420   If in doubt, return Unknown.  */
2421
2422typedef
2423   enum { ExactAlias, NoAlias, UnknownAlias }
2424   GSAliasing;
2425
2426
2427/* Produces the alias relation between an indexed guest
2428   state access and a non-indexed access. */
2429
2430static
2431GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2432                                    Int offset2, IRType ty2 )
2433{
2434   UInt minoff1, maxoff1, minoff2, maxoff2;
2435
2436   getArrayBounds( descr1, &minoff1, &maxoff1 );
2437   minoff2 = offset2;
2438   maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2439
2440   if (maxoff1 < minoff2 || maxoff2 < minoff1)
2441      return NoAlias;
2442
2443   /* Could probably do better here if required.  For the moment
2444      however just claim not to know anything more. */
2445   return UnknownAlias;
2446}
2447
2448
2449/* Produces the alias relation between two indexed guest state
2450   accesses. */
2451
2452static
2453GSAliasing getAliasingRelation_II (
2454              IRRegArray* descr1, IRExpr* ix1, Int bias1,
2455              IRRegArray* descr2, IRExpr* ix2, Int bias2
2456           )
2457{
2458   UInt minoff1, maxoff1, minoff2, maxoff2;
2459   Int  iters;
2460
2461   /* First try hard to show they don't alias. */
2462   getArrayBounds( descr1, &minoff1, &maxoff1 );
2463   getArrayBounds( descr2, &minoff2, &maxoff2 );
2464   if (maxoff1 < minoff2 || maxoff2 < minoff1)
2465      return NoAlias;
2466
2467   /* So the two arrays at least partially overlap.  To get any
2468      further we'll have to be sure that the descriptors are
2469      identical. */
2470   if (!eqIRRegArray(descr1, descr2))
2471      return UnknownAlias;
2472
2473   /* The descriptors are identical.  Now the only difference can be
2474      in the index expressions.  If they cannot be shown to be
2475      identical, we have to say we don't know what the aliasing
2476      relation will be.  Now, since the IR is flattened, the index
2477      expressions should be atoms -- either consts or tmps.  So that
2478      makes the comparison simple. */
2479   vassert(isIRAtom(ix1));
2480   vassert(isIRAtom(ix2));
2481   if (!eqIRAtom(ix1,ix2))
2482      return UnknownAlias;
2483
2484   /* Ok, the index expressions are identical.  So now the only way
2485      they can be different is in the bias.  Normalise this
2486      paranoidly, to reliably establish equality/non-equality. */
2487
2488   /* So now we know that the GetI and PutI index the same array
2489      with the same base.  Are the offsets the same, modulo the
2490      array size?  Do this paranoidly. */
2491   vassert(descr1->nElems == descr2->nElems);
2492   vassert(descr1->elemTy == descr2->elemTy);
2493   vassert(descr1->base   == descr2->base);
2494   iters = 0;
2495   while (bias1 < 0 || bias2 < 0) {
2496      bias1 += descr1->nElems;
2497      bias2 += descr1->nElems;
2498      iters++;
2499      if (iters > 10)
2500         vpanic("getAliasingRelation: iters");
2501   }
2502   vassert(bias1 >= 0 && bias2 >= 0);
2503   bias1 %= descr1->nElems;
2504   bias2 %= descr1->nElems;
2505   vassert(bias1 >= 0 && bias1 < descr1->nElems);
2506   vassert(bias2 >= 0 && bias2 < descr1->nElems);
2507
2508   /* Finally, biasP and biasG are normalised into the range
2509      0 .. descrP/G->nElems - 1.  And so we can establish
2510      equality/non-equality. */
2511
2512   return bias1==bias2 ? ExactAlias : NoAlias;
2513}
2514
2515
2516/*---------------------------------------------------------------*/
2517/*--- Common Subexpression Elimination                        ---*/
2518/*---------------------------------------------------------------*/
2519
2520/* Expensive in time and space. */
2521
2522/* Uses two environments:
2523   a IRTemp -> IRTemp mapping
2524   a mapping from AvailExpr* to IRTemp
2525*/
2526
2527typedef
2528   struct {
2529      enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
2530      union {
2531         /* unop(tmp) */
2532         struct {
2533            IROp   op;
2534            IRTemp arg;
2535         } Ut;
2536         /* binop(tmp,tmp) */
2537         struct {
2538            IROp   op;
2539            IRTemp arg1;
2540            IRTemp arg2;
2541         } Btt;
2542         /* binop(tmp,const) */
2543         struct {
2544            IROp    op;
2545            IRTemp  arg1;
2546            IRConst con2;
2547         } Btc;
2548         /* binop(const,tmp) */
2549         struct {
2550            IROp    op;
2551            IRConst con1;
2552            IRTemp  arg2;
2553         } Bct;
2554         /* F64i-style const */
2555         struct {
2556            ULong f64i;
2557         } Cf64i;
2558         /* Mux0X(tmp,tmp,tmp) */
2559         struct {
2560            IRTemp co;
2561            IRTemp e0;
2562            IRTemp eX;
2563         } Mttt;
2564         /* GetI(descr,tmp,bias)*/
2565         struct {
2566            IRRegArray* descr;
2567            IRTemp      ix;
2568            Int         bias;
2569         } GetIt;
2570      } u;
2571   }
2572   AvailExpr;
2573
2574static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
2575{
2576   if (a1->tag != a2->tag)
2577      return False;
2578   switch (a1->tag) {
2579      case Ut:
2580         return toBool(
2581                a1->u.Ut.op == a2->u.Ut.op
2582                && a1->u.Ut.arg == a2->u.Ut.arg);
2583      case Btt:
2584         return toBool(
2585                a1->u.Btt.op == a2->u.Btt.op
2586                && a1->u.Btt.arg1 == a2->u.Btt.arg1
2587                && a1->u.Btt.arg2 == a2->u.Btt.arg2);
2588      case Btc:
2589         return toBool(
2590                a1->u.Btc.op == a2->u.Btc.op
2591                && a1->u.Btc.arg1 == a2->u.Btc.arg1
2592                && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
2593      case Bct:
2594         return toBool(
2595                a1->u.Bct.op == a2->u.Bct.op
2596                && a1->u.Bct.arg2 == a2->u.Bct.arg2
2597                && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
2598      case Cf64i:
2599         return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
2600      case Mttt:
2601         return toBool(a1->u.Mttt.co == a2->u.Mttt.co
2602                       && a1->u.Mttt.e0 == a2->u.Mttt.e0
2603                       && a1->u.Mttt.eX == a2->u.Mttt.eX);
2604      case GetIt:
2605         return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
2606                       && a1->u.GetIt.ix == a2->u.GetIt.ix
2607                       && a1->u.GetIt.bias == a2->u.GetIt.bias);
2608      default: vpanic("eq_AvailExpr");
2609   }
2610}
2611
2612static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
2613{
2614   IRConst* con;
2615   switch (ae->tag) {
2616      case Ut:
2617         return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
2618      case Btt:
2619         return IRExpr_Binop( ae->u.Btt.op,
2620                              IRExpr_RdTmp(ae->u.Btt.arg1),
2621                              IRExpr_RdTmp(ae->u.Btt.arg2) );
2622      case Btc:
2623         con = LibVEX_Alloc(sizeof(IRConst));
2624         *con = ae->u.Btc.con2;
2625         return IRExpr_Binop( ae->u.Btc.op,
2626                              IRExpr_RdTmp(ae->u.Btc.arg1),
2627                              IRExpr_Const(con) );
2628      case Bct:
2629         con = LibVEX_Alloc(sizeof(IRConst));
2630         *con = ae->u.Bct.con1;
2631         return IRExpr_Binop( ae->u.Bct.op,
2632                              IRExpr_Const(con),
2633                              IRExpr_RdTmp(ae->u.Bct.arg2) );
2634      case Cf64i:
2635         return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
2636      case Mttt:
2637         return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
2638                             IRExpr_RdTmp(ae->u.Mttt.e0),
2639                             IRExpr_RdTmp(ae->u.Mttt.eX));
2640      case GetIt:
2641         return IRExpr_GetI(ae->u.GetIt.descr,
2642                            IRExpr_RdTmp(ae->u.GetIt.ix),
2643                            ae->u.GetIt.bias);
2644      default:
2645         vpanic("availExpr_to_IRExpr");
2646   }
2647}
2648
2649inline
2650static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
2651{
2652   HWord res;
2653   /* env :: IRTemp -> IRTemp */
2654   if (lookupHHW( env, &res, (HWord)tmp ))
2655      return (IRTemp)res;
2656   else
2657      return tmp;
2658}
2659
2660static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
2661{
2662   /* env :: IRTemp -> IRTemp */
2663   switch (ae->tag) {
2664      case Ut:
2665         ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
2666         break;
2667      case Btt:
2668         ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
2669         ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
2670         break;
2671      case Btc:
2672         ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
2673         break;
2674      case Bct:
2675         ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
2676         break;
2677      case Cf64i:
2678         break;
2679      case Mttt:
2680         ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
2681         ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
2682         ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
2683         break;
2684      case GetIt:
2685         ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
2686         break;
2687      default:
2688         vpanic("subst_AvailExpr");
2689   }
2690}
2691
2692static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2693{
2694   AvailExpr* ae;
2695
2696   if (e->tag == Iex_Unop
2697       && e->Iex.Unop.arg->tag == Iex_RdTmp) {
2698      ae = LibVEX_Alloc(sizeof(AvailExpr));
2699      ae->tag      = Ut;
2700      ae->u.Ut.op  = e->Iex.Unop.op;
2701      ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
2702      return ae;
2703   }
2704
2705   if (e->tag == Iex_Binop
2706       && e->Iex.Binop.arg1->tag == Iex_RdTmp
2707       && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2708      ae = LibVEX_Alloc(sizeof(AvailExpr));
2709      ae->tag        = Btt;
2710      ae->u.Btt.op   = e->Iex.Binop.op;
2711      ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2712      ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2713      return ae;
2714   }
2715
2716   if (e->tag == Iex_Binop
2717      && e->Iex.Binop.arg1->tag == Iex_RdTmp
2718      && e->Iex.Binop.arg2->tag == Iex_Const) {
2719      ae = LibVEX_Alloc(sizeof(AvailExpr));
2720      ae->tag        = Btc;
2721      ae->u.Btc.op   = e->Iex.Binop.op;
2722      ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2723      ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2724      return ae;
2725   }
2726
2727   if (e->tag == Iex_Binop
2728      && e->Iex.Binop.arg1->tag == Iex_Const
2729      && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2730      ae = LibVEX_Alloc(sizeof(AvailExpr));
2731      ae->tag        = Bct;
2732      ae->u.Bct.op   = e->Iex.Binop.op;
2733      ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2734      ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
2735      return ae;
2736   }
2737
2738   if (e->tag == Iex_Const
2739       && e->Iex.Const.con->tag == Ico_F64i) {
2740      ae = LibVEX_Alloc(sizeof(AvailExpr));
2741      ae->tag          = Cf64i;
2742      ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2743      return ae;
2744   }
2745
2746   if (e->tag == Iex_Mux0X
2747       && e->Iex.Mux0X.cond->tag == Iex_RdTmp
2748       && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
2749       && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
2750      ae = LibVEX_Alloc(sizeof(AvailExpr));
2751      ae->tag       = Mttt;
2752      ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
2753      ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
2754      ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
2755      return ae;
2756   }
2757
2758   if (e->tag == Iex_GetI
2759       && e->Iex.GetI.ix->tag == Iex_RdTmp) {
2760      ae = LibVEX_Alloc(sizeof(AvailExpr));
2761      ae->tag           = GetIt;
2762      ae->u.GetIt.descr = e->Iex.GetI.descr;
2763      ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
2764      ae->u.GetIt.bias  = e->Iex.GetI.bias;
2765      return ae;
2766   }
2767
2768   return NULL;
2769}
2770
2771
2772/* The BB is modified in-place.  Returns True if any changes were
2773   made. */
2774
2775static Bool do_cse_BB ( IRSB* bb )
2776{
2777   Int        i, j, paranoia;
2778   IRTemp     t, q;
2779   IRStmt*    st;
2780   AvailExpr* eprime;
2781   AvailExpr* ae;
2782   Bool       invalidate;
2783   Bool       anyDone = False;
2784
2785   HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2786   HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2787
2788   vassert(sizeof(IRTemp) <= sizeof(HWord));
2789
2790   if (0) { ppIRSB(bb); vex_printf("\n\n"); }
2791
2792   /* Iterate forwards over the stmts.
2793      On seeing "t = E", where E is one of the 5 AvailExpr forms:
2794         let E' = apply tenv substitution to E
2795         search aenv for E'
2796            if a mapping E' -> q is found,
2797               replace this stmt by "t = q"
2798               and add binding t -> q to tenv
2799            else
2800               add binding E' -> t to aenv
2801               replace this stmt by "t = E'"
2802
2803      Other statements are only interesting to the extent that they
2804      might invalidate some of the expressions in aenv.  So there is
2805      an invalidate-bindings check for each statement seen.
2806   */
2807   for (i = 0; i < bb->stmts_used; i++) {
2808      st = bb->stmts[i];
2809
2810      /* ------ BEGIN invalidate aenv bindings ------ */
2811      /* This is critical: remove from aenv any E' -> .. bindings
2812         which might be invalidated by this statement.  The only
2813         vulnerable kind of bindings are the GetI kind.
2814            Dirty call - dump (paranoia level -> 2)
2815            Store      - dump (ditto)
2816            Put, PutI  - dump unless no-overlap is proven (.. -> 1)
2817         Uses getAliasingRelation_IC and getAliasingRelation_II
2818         to do the no-overlap assessments needed for Put/PutI.
2819      */
2820      switch (st->tag) {
2821         case Ist_Dirty: case Ist_Store: case Ist_MBE:
2822         case Ist_CAS: case Ist_LLSC:
2823            paranoia = 2; break;
2824         case Ist_Put: case Ist_PutI:
2825            paranoia = 1; break;
2826         case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
2827         case Ist_WrTmp: case Ist_Exit:
2828            paranoia = 0; break;
2829         default:
2830            vpanic("do_cse_BB(1)");
2831      }
2832
2833      if (paranoia > 0) {
2834         for (j = 0; j < aenv->used; j++) {
2835            if (!aenv->inuse[j])
2836               continue;
2837            ae = (AvailExpr*)aenv->key[j];
2838            if (ae->tag != GetIt)
2839               continue;
2840            invalidate = False;
2841            if (paranoia >= 2) {
2842               invalidate = True;
2843            } else {
2844               vassert(paranoia == 1);
2845               if (st->tag == Ist_Put) {
2846                  if (getAliasingRelation_IC(
2847                         ae->u.GetIt.descr,
2848                         IRExpr_RdTmp(ae->u.GetIt.ix),
2849                         st->Ist.Put.offset,
2850                         typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
2851                      ) != NoAlias)
2852                     invalidate = True;
2853               }
2854               else
2855               if (st->tag == Ist_PutI) {
2856                  if (getAliasingRelation_II(
2857                         ae->u.GetIt.descr,
2858                         IRExpr_RdTmp(ae->u.GetIt.ix),
2859                         ae->u.GetIt.bias,
2860                         st->Ist.PutI.descr,
2861                         st->Ist.PutI.ix,
2862                         st->Ist.PutI.bias
2863                      ) != NoAlias)
2864                     invalidate = True;
2865               }
2866               else
2867                  vpanic("do_cse_BB(2)");
2868            }
2869
2870            if (invalidate) {
2871               aenv->inuse[j] = False;
2872               aenv->key[j]   = (HWord)NULL;  /* be sure */
2873            }
2874         } /* for j */
2875      } /* paranoia > 0 */
2876
2877      /* ------ ENV invalidate aenv bindings ------ */
2878
2879      /* ignore not-interestings */
2880      if (st->tag != Ist_WrTmp)
2881         continue;
2882
2883      t = st->Ist.WrTmp.tmp;
2884      eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
2885      /* ignore if not of AvailExpr form */
2886      if (!eprime)
2887         continue;
2888
2889      /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2890
2891      /* apply tenv */
2892      subst_AvailExpr( tenv, eprime );
2893
2894      /* search aenv for eprime, unfortunately the hard way */
2895      for (j = 0; j < aenv->used; j++)
2896         if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2897            break;
2898
2899      if (j < aenv->used) {
2900         /* A binding E' -> q was found.  Replace stmt by "t = q" and
2901            note the t->q binding in tenv. */
2902         /* (this is the core of the CSE action) */
2903         q = (IRTemp)aenv->val[j];
2904         bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
2905         addToHHW( tenv, (HWord)t, (HWord)q );
2906         anyDone = True;
2907      } else {
2908         /* No binding was found, so instead we add E' -> t to our
2909            collection of available expressions, replace this stmt
2910            with "t = E'", and move on. */
2911         bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
2912         addToHHW( aenv, (HWord)eprime, (HWord)t );
2913      }
2914   }
2915
2916   /*
2917   ppIRSB(bb);
2918   sanityCheckIRSB(bb, Ity_I32);
2919   vex_printf("\n\n");
2920   */
2921   return anyDone;
2922}
2923
2924
2925/*---------------------------------------------------------------*/
2926/*--- Add32/Sub32 chain collapsing                            ---*/
2927/*---------------------------------------------------------------*/
2928
2929/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2930
2931/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
2932   yes, set *tmp and *i32 appropriately.  *i32 is set as if the
2933   root node is Add32, not Sub32. */
2934
2935static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2936{
2937   if (e->tag != Iex_Binop)
2938      return False;
2939   if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2940      return False;
2941   if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
2942      return False;
2943   if (e->Iex.Binop.arg2->tag != Iex_Const)
2944      return False;
2945   *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2946   *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2947   if (e->Iex.Binop.op == Iop_Sub32)
2948      *i32 = -*i32;
2949   return True;
2950}
2951
2952
2953/* Figure out if tmp can be expressed as tmp2 +32 const, for some
2954   other tmp2.  Scan backwards from the specified start point -- an
2955   optimisation. */
2956
2957static Bool collapseChain ( IRSB* bb, Int startHere,
2958                            IRTemp tmp,
2959                            IRTemp* tmp2, Int* i32 )
2960{
2961   Int     j, ii;
2962   IRTemp  vv;
2963   IRStmt* st;
2964   IRExpr* e;
2965
2966   /* the (var, con) pair contain the current 'representation' for
2967      'tmp'.  We start with 'tmp + 0'.  */
2968   IRTemp var = tmp;
2969   Int    con = 0;
2970
2971   /* Scan backwards to see if tmp can be replaced by some other tmp
2972     +/- a constant. */
2973   for (j = startHere; j >= 0; j--) {
2974      st = bb->stmts[j];
2975      if (st->tag != Ist_WrTmp)
2976         continue;
2977      if (st->Ist.WrTmp.tmp != var)
2978         continue;
2979      e = st->Ist.WrTmp.data;
2980      if (!isAdd32OrSub32(e, &vv, &ii))
2981         break;
2982      var = vv;
2983      con += ii;
2984   }
2985   if (j == -1)
2986      /* no earlier binding for var .. ill-formed IR */
2987      vpanic("collapseChain");
2988
2989   /* so, did we find anything interesting? */
2990   if (var == tmp)
2991      return False; /* no .. */
2992
2993   *tmp2 = var;
2994   *i32  = con;
2995   return True;
2996}
2997
2998
2999/* ------- Main function for Add32/Sub32 chain collapsing ------ */
3000
3001static void collapse_AddSub_chains_BB ( IRSB* bb )
3002{
3003   IRStmt *st;
3004   IRTemp var, var2;
3005   Int    i, con, con2;
3006
3007   for (i = bb->stmts_used-1; i >= 0; i--) {
3008      st = bb->stmts[i];
3009      if (st->tag == Ist_NoOp)
3010         continue;
3011
3012      /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
3013
3014      if (st->tag == Ist_WrTmp
3015          && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
3016
3017         /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
3018            Find out if var can be expressed as var2 + con2. */
3019         if (collapseChain(bb, i-1, var, &var2, &con2)) {
3020            if (DEBUG_IROPT) {
3021               vex_printf("replacing1 ");
3022               ppIRStmt(st);
3023               vex_printf(" with ");
3024            }
3025            con2 += con;
3026            bb->stmts[i]
3027               = IRStmt_WrTmp(
3028                    st->Ist.WrTmp.tmp,
3029                    (con2 >= 0)
3030                      ? IRExpr_Binop(Iop_Add32,
3031                                     IRExpr_RdTmp(var2),
3032                                     IRExpr_Const(IRConst_U32(con2)))
3033                      : IRExpr_Binop(Iop_Sub32,
3034                                     IRExpr_RdTmp(var2),
3035                                     IRExpr_Const(IRConst_U32(-con2)))
3036                 );
3037            if (DEBUG_IROPT) {
3038               ppIRStmt(bb->stmts[i]);
3039               vex_printf("\n");
3040            }
3041         }
3042
3043         continue;
3044      }
3045
3046      /* Try to collapse 't1 = GetI[t2, con]'. */
3047
3048      if (st->tag == Ist_WrTmp
3049          && st->Ist.WrTmp.data->tag == Iex_GetI
3050          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
3051          && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
3052                                      ->Iex.RdTmp.tmp, &var2, &con2)) {
3053         if (DEBUG_IROPT) {
3054            vex_printf("replacing3 ");
3055            ppIRStmt(st);
3056            vex_printf(" with ");
3057         }
3058         con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
3059         bb->stmts[i]
3060            = IRStmt_WrTmp(
3061                 st->Ist.WrTmp.tmp,
3062                 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
3063                             IRExpr_RdTmp(var2),
3064                             con2));
3065         if (DEBUG_IROPT) {
3066            ppIRStmt(bb->stmts[i]);
3067            vex_printf("\n");
3068         }
3069         continue;
3070      }
3071
3072      /* Perhaps st is PutI[t, con] ? */
3073
3074      if (st->tag == Ist_PutI
3075          && st->Ist.PutI.ix->tag == Iex_RdTmp
3076          && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
3077                               &var2, &con2)) {
3078         if (DEBUG_IROPT) {
3079            vex_printf("replacing2 ");
3080            ppIRStmt(st);
3081            vex_printf(" with ");
3082         }
3083         con2 += st->Ist.PutI.bias;
3084         bb->stmts[i]
3085           = IRStmt_PutI(st->Ist.PutI.descr,
3086                         IRExpr_RdTmp(var2),
3087                         con2,
3088                         st->Ist.PutI.data);
3089         if (DEBUG_IROPT) {
3090            ppIRStmt(bb->stmts[i]);
3091            vex_printf("\n");
3092         }
3093         continue;
3094      }
3095
3096   } /* for */
3097}
3098
3099
3100/*---------------------------------------------------------------*/
3101/*--- PutI/GetI transformations                               ---*/
3102/*---------------------------------------------------------------*/
3103
3104/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
3105   the given starting point to find, if any, a PutI which writes
3106   exactly the same piece of guest state, and so return the expression
3107   that the PutI writes.  This is the core of PutI-GetI forwarding. */
3108
3109static
3110IRExpr* findPutI ( IRSB* bb, Int startHere,
3111                   IRRegArray* descrG, IRExpr* ixG, Int biasG )
3112{
3113   Int        j;
3114   IRStmt*    st;
3115   GSAliasing relation;
3116
3117   if (0) {
3118      vex_printf("\nfindPutI ");
3119      ppIRRegArray(descrG);
3120      vex_printf(" ");
3121      ppIRExpr(ixG);
3122      vex_printf(" %d\n", biasG);
3123   }
3124
3125   /* Scan backwards in bb from startHere to find a suitable PutI
3126      binding for (descrG, ixG, biasG), if any. */
3127
3128   for (j = startHere; j >= 0; j--) {
3129      st = bb->stmts[j];
3130      if (st->tag == Ist_NoOp)
3131         continue;
3132
3133      if (st->tag == Ist_Put) {
3134         /* Non-indexed Put.  This can't give a binding, but we do
3135            need to check it doesn't invalidate the search by
3136            overlapping any part of the indexed guest state. */
3137
3138         relation
3139            = getAliasingRelation_IC(
3140                 descrG, ixG,
3141                 st->Ist.Put.offset,
3142                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
3143
3144         if (relation == NoAlias) {
3145            /* we're OK; keep going */
3146            continue;
3147         } else {
3148            /* relation == UnknownAlias || relation == ExactAlias */
3149            /* If this assertion fails, we've found a Put which writes
3150               an area of guest state which is read by a GetI.  Which
3151               is unlikely (although not per se wrong). */
3152            vassert(relation != ExactAlias);
3153            /* This Put potentially writes guest state that the GetI
3154               reads; we must fail. */
3155            return NULL;
3156         }
3157      }
3158
3159      if (st->tag == Ist_PutI) {
3160
3161         relation = getAliasingRelation_II(
3162                       descrG, ixG, biasG,
3163                       st->Ist.PutI.descr,
3164                       st->Ist.PutI.ix,
3165                       st->Ist.PutI.bias
3166                    );
3167
3168         if (relation == NoAlias) {
3169            /* This PutI definitely doesn't overlap.  Ignore it and
3170               keep going. */
3171            continue; /* the for j loop */
3172         }
3173
3174         if (relation == UnknownAlias) {
3175            /* We don't know if this PutI writes to the same guest
3176               state that the GetI, or not.  So we have to give up. */
3177            return NULL;
3178         }
3179
3180         /* Otherwise, we've found what we're looking for.  */
3181         vassert(relation == ExactAlias);
3182         return st->Ist.PutI.data;
3183
3184      } /* if (st->tag == Ist_PutI) */
3185
3186      if (st->tag == Ist_Dirty) {
3187         /* Be conservative.  If the dirty call has any guest effects at
3188            all, give up.  We could do better -- only give up if there
3189            are any guest writes/modifies. */
3190         if (st->Ist.Dirty.details->nFxState > 0)
3191            return NULL;
3192      }
3193
3194   } /* for */
3195
3196   /* No valid replacement was found. */
3197   return NULL;
3198}
3199
3200
3201
3202/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3203   that it writes exactly the same piece of guest state) ?  Safe
3204   answer: False. */
3205
3206static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3207{
3208   vassert(pi->tag == Ist_PutI);
3209   if (s2->tag != Ist_PutI)
3210      return False;
3211
3212   return toBool(
3213          getAliasingRelation_II(
3214             pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3215             s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3216          )
3217          == ExactAlias
3218          );
3219}
3220
3221
3222/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3223   overlap it?  Safe answer: True.  Note, we could do a lot better
3224   than this if needed. */
3225
3226static
3227Bool guestAccessWhichMightOverlapPutI (
3228        IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3229     )
3230{
3231   GSAliasing relation;
3232   UInt       minoffP, maxoffP;
3233
3234   vassert(pi->tag == Ist_PutI);
3235   getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
3236   switch (s2->tag) {
3237
3238      case Ist_NoOp:
3239      case Ist_IMark:
3240         return False;
3241
3242      case Ist_MBE:
3243      case Ist_AbiHint:
3244         /* just be paranoid ... these should be rare. */
3245         return True;
3246
3247      case Ist_CAS:
3248         /* This is unbelievably lame, but it's probably not
3249            significant from a performance point of view.  Really, a
3250            CAS is a load-store op, so it should be safe to say False.
3251            However .. */
3252         return True;
3253
3254      case Ist_Dirty:
3255         /* If the dirty call has any guest effects at all, give up.
3256            Probably could do better. */
3257         if (s2->Ist.Dirty.details->nFxState > 0)
3258            return True;
3259         return False;
3260
3261      case Ist_Put:
3262         vassert(isIRAtom(s2->Ist.Put.data));
3263         relation
3264            = getAliasingRelation_IC(
3265                 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3266                 s2->Ist.Put.offset,
3267                 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3268              );
3269         goto have_relation;
3270
3271      case Ist_PutI:
3272         vassert(isIRAtom(s2->Ist.PutI.ix));
3273         vassert(isIRAtom(s2->Ist.PutI.data));
3274         relation
3275            = getAliasingRelation_II(
3276                 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3277                 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3278              );
3279         goto have_relation;
3280
3281      case Ist_WrTmp:
3282         if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3283            relation
3284               = getAliasingRelation_II(
3285                    pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3286                                        pi->Ist.PutI.bias,
3287                    s2->Ist.WrTmp.data->Iex.GetI.descr,
3288                    s2->Ist.WrTmp.data->Iex.GetI.ix,
3289                    s2->Ist.WrTmp.data->Iex.GetI.bias
3290                 );
3291            goto have_relation;
3292         }
3293         if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3294            relation
3295               = getAliasingRelation_IC(
3296                    pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3297                    s2->Ist.WrTmp.data->Iex.Get.offset,
3298                    s2->Ist.WrTmp.data->Iex.Get.ty
3299                 );
3300            goto have_relation;
3301         }
3302         return False;
3303
3304      case Ist_Store:
3305         vassert(isIRAtom(s2->Ist.Store.addr));
3306         vassert(isIRAtom(s2->Ist.Store.data));
3307         return False;
3308
3309      default:
3310         vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3311         vpanic("guestAccessWhichMightOverlapPutI");
3312   }
3313
3314  have_relation:
3315   if (relation == NoAlias)
3316      return False;
3317   else
3318      return True; /* ExactAlias or UnknownAlias */
3319}
3320
3321
3322
3323/* ---------- PutI/GetI transformations main functions --------- */
3324
3325/* Remove redundant GetIs, to the extent that they can be detected.
3326   bb is modified in-place. */
3327
3328static
3329void do_redundant_GetI_elimination ( IRSB* bb )
3330{
3331   Int     i;
3332   IRStmt* st;
3333
3334   for (i = bb->stmts_used-1; i >= 0; i--) {
3335      st = bb->stmts[i];
3336      if (st->tag == Ist_NoOp)
3337         continue;
3338
3339      if (st->tag == Ist_WrTmp
3340          && st->Ist.WrTmp.data->tag == Iex_GetI
3341          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3342         IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3343         IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
3344         Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
3345         IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
3346         if (replacement
3347             && isIRAtom(replacement)
3348             /* Make sure we're doing a type-safe transformation! */
3349             && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3350            if (DEBUG_IROPT) {
3351               vex_printf("rGI:  ");
3352               ppIRExpr(st->Ist.WrTmp.data);
3353               vex_printf(" -> ");
3354               ppIRExpr(replacement);
3355               vex_printf("\n");
3356            }
3357            bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3358         }
3359      }
3360   }
3361
3362}
3363
3364
3365/* Remove redundant PutIs, to the extent which they can be detected.
3366   bb is modified in-place. */
3367
3368static
3369void do_redundant_PutI_elimination ( IRSB* bb )
3370{
3371   Int    i, j;
3372   Bool   delete;
3373   IRStmt *st, *stj;
3374
3375   for (i = 0; i < bb->stmts_used; i++) {
3376      st = bb->stmts[i];
3377      if (st->tag != Ist_PutI)
3378         continue;
3379      /* Ok, search forwards from here to see if we can find another
3380         PutI which makes this one redundant, and dodging various
3381         hazards.  Search forwards:
3382         * If conditional exit, give up (because anything after that
3383           does not postdominate this put).
3384         * If a Get which might overlap, give up (because this PutI
3385           not necessarily dead).
3386         * If a Put which is identical, stop with success.
3387         * If a Put which might overlap, but is not identical, give up.
3388         * If a dirty helper call which might write guest state, give up.
3389         * If a Put which definitely doesn't overlap, or any other
3390           kind of stmt, continue.
3391      */
3392      delete = False;
3393      for (j = i+1; j < bb->stmts_used; j++) {
3394         stj = bb->stmts[j];
3395         if (stj->tag == Ist_NoOp)
3396            continue;
3397         if (identicalPutIs(st, stj)) {
3398            /* success! */
3399            delete = True;
3400            break;
3401         }
3402         if (stj->tag == Ist_Exit)
3403            /* give up */
3404            break;
3405         if (st->tag == Ist_Dirty)
3406            /* give up; could do better here */
3407            break;
3408         if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3409            /* give up */
3410           break;
3411      }
3412
3413      if (delete) {
3414         if (DEBUG_IROPT) {
3415            vex_printf("rPI:  ");
3416            ppIRStmt(st);
3417            vex_printf("\n");
3418         }
3419         bb->stmts[i] = IRStmt_NoOp();
3420      }
3421
3422   }
3423}
3424
3425
3426/*---------------------------------------------------------------*/
3427/*--- Loop unrolling                                          ---*/
3428/*---------------------------------------------------------------*/
3429
3430/* Adjust all tmp values (names) in e by delta.  e is destructively
3431   modified. */
3432
3433static void deltaIRExpr ( IRExpr* e, Int delta )
3434{
3435   Int i;
3436   switch (e->tag) {
3437      case Iex_RdTmp:
3438         e->Iex.RdTmp.tmp += delta;
3439         break;
3440      case Iex_Get:
3441      case Iex_Const:
3442         break;
3443      case Iex_GetI:
3444         deltaIRExpr(e->Iex.GetI.ix, delta);
3445         break;
3446      case Iex_Qop:
3447         deltaIRExpr(e->Iex.Qop.arg1, delta);
3448         deltaIRExpr(e->Iex.Qop.arg2, delta);
3449         deltaIRExpr(e->Iex.Qop.arg3, delta);
3450         deltaIRExpr(e->Iex.Qop.arg4, delta);
3451         break;
3452      case Iex_Triop:
3453         deltaIRExpr(e->Iex.Triop.arg1, delta);
3454         deltaIRExpr(e->Iex.Triop.arg2, delta);
3455         deltaIRExpr(e->Iex.Triop.arg3, delta);
3456         break;
3457      case Iex_Binop:
3458         deltaIRExpr(e->Iex.Binop.arg1, delta);
3459         deltaIRExpr(e->Iex.Binop.arg2, delta);
3460         break;
3461      case Iex_Unop:
3462         deltaIRExpr(e->Iex.Unop.arg, delta);
3463         break;
3464      case Iex_Load:
3465         deltaIRExpr(e->Iex.Load.addr, delta);
3466         break;
3467      case Iex_CCall:
3468         for (i = 0; e->Iex.CCall.args[i]; i++)
3469            deltaIRExpr(e->Iex.CCall.args[i], delta);
3470         break;
3471      case Iex_Mux0X:
3472         deltaIRExpr(e->Iex.Mux0X.cond, delta);
3473         deltaIRExpr(e->Iex.Mux0X.expr0, delta);
3474         deltaIRExpr(e->Iex.Mux0X.exprX, delta);
3475         break;
3476      default:
3477         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3478         vpanic("deltaIRExpr");
3479   }
3480}
3481
3482/* Adjust all tmp values (names) in st by delta.  st is destructively
3483   modified. */
3484
3485static void deltaIRStmt ( IRStmt* st, Int delta )
3486{
3487   Int      i;
3488   IRDirty* d;
3489   switch (st->tag) {
3490      case Ist_NoOp:
3491      case Ist_IMark:
3492      case Ist_MBE:
3493         break;
3494      case Ist_AbiHint:
3495         deltaIRExpr(st->Ist.AbiHint.base, delta);
3496         deltaIRExpr(st->Ist.AbiHint.nia, delta);
3497         break;
3498      case Ist_Put:
3499         deltaIRExpr(st->Ist.Put.data, delta);
3500         break;
3501      case Ist_PutI:
3502         deltaIRExpr(st->Ist.PutI.ix, delta);
3503         deltaIRExpr(st->Ist.PutI.data, delta);
3504         break;
3505      case Ist_WrTmp:
3506         st->Ist.WrTmp.tmp += delta;
3507         deltaIRExpr(st->Ist.WrTmp.data, delta);
3508         break;
3509      case Ist_Exit:
3510         deltaIRExpr(st->Ist.Exit.guard, delta);
3511         break;
3512      case Ist_Store:
3513         deltaIRExpr(st->Ist.Store.addr, delta);
3514         deltaIRExpr(st->Ist.Store.data, delta);
3515         break;
3516      case Ist_CAS:
3517         if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
3518            st->Ist.CAS.details->oldHi += delta;
3519         st->Ist.CAS.details->oldLo += delta;
3520         deltaIRExpr(st->Ist.CAS.details->addr, delta);
3521         if (st->Ist.CAS.details->expdHi)
3522            deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
3523         deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
3524         if (st->Ist.CAS.details->dataHi)
3525            deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
3526         deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
3527         break;
3528      case Ist_LLSC:
3529         st->Ist.LLSC.result += delta;
3530         deltaIRExpr(st->Ist.LLSC.addr, delta);
3531         if (st->Ist.LLSC.storedata)
3532            deltaIRExpr(st->Ist.LLSC.storedata, delta);
3533         break;
3534      case Ist_Dirty:
3535         d = st->Ist.Dirty.details;
3536         deltaIRExpr(d->guard, delta);
3537         for (i = 0; d->args[i]; i++)
3538            deltaIRExpr(d->args[i], delta);
3539         if (d->tmp != IRTemp_INVALID)
3540            d->tmp += delta;
3541         if (d->mAddr)
3542            deltaIRExpr(d->mAddr, delta);
3543         break;
3544      default:
3545         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3546         vpanic("deltaIRStmt");
3547   }
3548}
3549
3550
3551/* If possible, return a loop-unrolled version of bb0.  The original
3552   is changed.  If not possible, return NULL.  */
3553
3554/* The two schemas considered are:
3555
3556     X: BODY; goto X
3557
3558     which unrolls to (eg)  X: BODY;BODY; goto X
3559
3560   and
3561
3562       X: BODY; if (c) goto X; goto Y
3563   which trivially transforms to
3564       X: BODY; if (!c) goto Y; goto X;
3565   so it falls in the scope of the first case.
3566
3567   X and Y must be literal (guest) addresses.
3568*/
3569
3570static Int calc_unroll_factor( IRSB* bb )
3571{
3572   Int n_stmts, i;
3573
3574   n_stmts = 0;
3575   for (i = 0; i < bb->stmts_used; i++) {
3576      if (bb->stmts[i]->tag != Ist_NoOp)
3577         n_stmts++;
3578   }
3579
3580   if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
3581      if (vex_control.iropt_verbosity > 0)
3582         vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
3583                    n_stmts, 8* n_stmts);
3584      return 8;
3585   }
3586   if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
3587      if (vex_control.iropt_verbosity > 0)
3588         vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
3589                    n_stmts, 4* n_stmts);
3590      return 4;
3591   }
3592
3593   if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
3594      if (vex_control.iropt_verbosity > 0)
3595         vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
3596                    n_stmts, 2* n_stmts);
3597      return 2;
3598   }
3599
3600   if (vex_control.iropt_verbosity > 0)
3601      vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
3602
3603   return 1;
3604}
3605
3606
3607static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
3608{
3609   Int      i, j, jmax, n_vars;
3610   Bool     xxx_known;
3611   Addr64   xxx_value, yyy_value;
3612   IRExpr*  udst;
3613   IRStmt*  st;
3614   IRConst* con;
3615   IRSB     *bb1, *bb2;
3616   Int      unroll_factor;
3617
3618   if (vex_control.iropt_unroll_thresh <= 0)
3619      return NULL;
3620
3621   /* First off, figure out if we can unroll this loop.  Do this
3622      without modifying bb0. */
3623
3624   if (bb0->jumpkind != Ijk_Boring)
3625      return NULL;
3626
3627   xxx_known = False;
3628   xxx_value = 0;
3629
3630   /* Extract the next-guest address.  If it isn't a literal, we
3631      have to give up. */
3632
3633   udst = bb0->next;
3634   if (udst->tag == Iex_Const
3635       && (udst->Iex.Const.con->tag == Ico_U32
3636           || udst->Iex.Const.con->tag == Ico_U64)) {
3637      /* The BB ends in a jump to a literal location. */
3638      xxx_known = True;
3639      xxx_value = udst->Iex.Const.con->tag == Ico_U64
3640                    ?  udst->Iex.Const.con->Ico.U64
3641                    : (Addr64)(udst->Iex.Const.con->Ico.U32);
3642   }
3643
3644   if (!xxx_known)
3645      return NULL;
3646
3647   /* Now we know the BB ends to a jump to a literal location.  If
3648      it's a jump to itself (viz, idiom #1), move directly to the
3649      unrolling stage, first cloning the bb so the original isn't
3650      modified. */
3651   if (xxx_value == my_addr) {
3652      unroll_factor = calc_unroll_factor( bb0 );
3653      if (unroll_factor < 2)
3654         return NULL;
3655      bb1 = deepCopyIRSB( bb0 );
3656      bb0 = NULL;
3657      udst = NULL; /* is now invalid */
3658      goto do_unroll;
3659   }
3660
3661   /* Search for the second idiomatic form:
3662        X: BODY; if (c) goto X; goto Y
3663      We know Y, but need to establish that the last stmt
3664      is 'if (c) goto X'.
3665   */
3666   yyy_value = xxx_value;
3667   for (i = bb0->stmts_used-1; i >= 0; i--)
3668      if (bb0->stmts[i])
3669         break;
3670
3671   if (i < 0)
3672      return NULL; /* block with no stmts.  Strange. */
3673
3674   st = bb0->stmts[i];
3675   if (st->tag != Ist_Exit)
3676      return NULL;
3677   if (st->Ist.Exit.jk != Ijk_Boring)
3678      return NULL;
3679
3680   con = st->Ist.Exit.dst;
3681   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3682
3683   xxx_value = con->tag == Ico_U64
3684                  ? st->Ist.Exit.dst->Ico.U64
3685                  : (Addr64)(st->Ist.Exit.dst->Ico.U32);
3686
3687   /* If this assertion fails, we have some kind of type error. */
3688   vassert(con->tag == udst->Iex.Const.con->tag);
3689
3690   if (xxx_value != my_addr)
3691      /* We didn't find either idiom.  Give up. */
3692      return NULL;
3693
3694   /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
3695      yyy values (which makes it look like idiom #1), and go into
3696      unrolling proper.  This means finding (again) the last stmt, in
3697      the copied BB. */
3698
3699   unroll_factor = calc_unroll_factor( bb0 );
3700   if (unroll_factor < 2)
3701      return NULL;
3702
3703   bb1 = deepCopyIRSB( bb0 );
3704   bb0 = NULL;
3705   udst = NULL; /* is now invalid */
3706   for (i = bb1->stmts_used-1; i >= 0; i--)
3707      if (bb1->stmts[i])
3708         break;
3709
3710   /* The next bunch of assertions should be true since we already
3711      found and checked the last stmt in the original bb. */
3712
3713   vassert(i >= 0);
3714
3715   st = bb1->stmts[i];
3716   vassert(st->tag == Ist_Exit);
3717
3718   con = st->Ist.Exit.dst;
3719   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3720
3721   udst = bb1->next;
3722   vassert(udst->tag == Iex_Const);
3723   vassert(udst->Iex.Const.con->tag == Ico_U32
3724          || udst->Iex.Const.con->tag == Ico_U64);
3725   vassert(con->tag == udst->Iex.Const.con->tag);
3726
3727   /* switch the xxx and yyy fields around */
3728   if (con->tag == Ico_U64) {
3729      udst->Iex.Const.con->Ico.U64 = xxx_value;
3730      con->Ico.U64 = yyy_value;
3731   } else {
3732      udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
3733      con->Ico.U32 = (UInt)yyy_value;
3734   }
3735
3736   /* negate the test condition */
3737   st->Ist.Exit.guard
3738      = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
3739
3740   /* --- The unroller proper.  Both idioms are by now --- */
3741   /* --- now converted to idiom 1. --- */
3742
3743  do_unroll:
3744
3745   vassert(unroll_factor == 2
3746           || unroll_factor == 4
3747           || unroll_factor == 8);
3748
3749   jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3750   for (j = 1; j <= jmax; j++) {
3751
3752      n_vars = bb1->tyenv->types_used;
3753
3754      bb2 = deepCopyIRSB(bb1);
3755      for (i = 0; i < n_vars; i++)
3756         (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3757
3758      for (i = 0; i < bb2->stmts_used; i++) {
3759         /* deltaIRStmt destructively modifies the stmt, but
3760            that's OK since bb2 is a complete fresh copy of bb1. */
3761         deltaIRStmt(bb2->stmts[i], n_vars);
3762         addStmtToIRSB(bb1, bb2->stmts[i]);
3763      }
3764   }
3765
3766   if (DEBUG_IROPT) {
3767      vex_printf("\nUNROLLED (%llx)\n", my_addr);
3768      ppIRSB(bb1);
3769      vex_printf("\n");
3770   }
3771
3772   /* Flattening; sigh.  The unroller succeeds in breaking flatness
3773      by negating the test condition.  This should be fixed properly.
3774      For the moment use this shotgun approach.  */
3775   return flatten_BB(bb1);
3776}
3777
3778
3779/*---------------------------------------------------------------*/
3780/*--- The tree builder                                        ---*/
3781/*---------------------------------------------------------------*/
3782
3783/* This isn't part of IR optimisation.  Really it's a pass done prior
3784   to instruction selection, which improves the code that the
3785   instruction selector can produce. */
3786
3787/* --- The 'tmp' environment is the central data structure here --- */
3788
3789/* The number of outstanding bindings we're prepared to track.
3790   The number of times the env becomes full and we have to dump
3791   the oldest binding (hence reducing code quality) falls very
3792   rapidly as the env size increases.  8 gives reasonable performance
3793   under most circumstances. */
3794#define A_NENV 10
3795
3796/* bindee == NULL   ===  slot is not in use
3797   bindee != NULL   ===  slot is in use
3798*/
3799typedef
3800   struct {
3801      IRTemp  binder;
3802      IRExpr* bindee;
3803      Bool    doesLoad;
3804      Bool    doesGet;
3805   }
3806   ATmpInfo;
3807
3808__attribute__((unused))
3809static void ppAEnv ( ATmpInfo* env )
3810{
3811   Int i;
3812   for (i = 0; i < A_NENV; i++) {
3813      vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
3814      if (env[i].bindee)
3815         ppIRExpr(env[i].bindee);
3816      else
3817         vex_printf("(null)");
3818      vex_printf("\n");
3819   }
3820}
3821
3822/* --- Tree-traversal fns --- */
3823
3824/* Traverse an expr, and detect if any part of it reads memory or does
3825   a Get.  Be careful ... this really controls how much the
3826   tree-builder can reorder the code, so getting it right is critical.
3827*/
3828static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3829{
3830   Int i;
3831   switch (e->tag) {
3832      case Iex_CCall:
3833         for (i = 0; e->Iex.CCall.args[i]; i++)
3834            setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3835         return;
3836      case Iex_Mux0X:
3837         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3838         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3839         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3840         return;
3841      case Iex_Qop:
3842         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
3843         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
3844         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
3845         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
3846         return;
3847      case Iex_Triop:
3848         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
3849         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
3850         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
3851         return;
3852      case Iex_Binop:
3853         setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3854         setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3855         return;
3856      case Iex_Unop:
3857         setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3858         return;
3859      case Iex_Load:
3860         *doesLoad = True;
3861         setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
3862         return;
3863      case Iex_Get:
3864         *doesGet = True;
3865         return;
3866      case Iex_GetI:
3867         *doesGet = True;
3868         setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
3869         return;
3870      case Iex_RdTmp:
3871      case Iex_Const:
3872         return;
3873      default:
3874         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3875         vpanic("setHints_Expr");
3876   }
3877}
3878
3879
3880/* Add a binding to the front of the env and slide all the rest
3881   backwards.  It should be the case that the last slot is free. */
3882static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
3883{
3884   Int i;
3885   vassert(env[A_NENV-1].bindee == NULL);
3886   for (i = A_NENV-1; i >= 1; i--)
3887      env[i] = env[i-1];
3888   env[0].binder   = binder;
3889   env[0].bindee   = bindee;
3890   env[0].doesLoad = False; /* filled in later */
3891   env[0].doesGet  = False; /* filled in later */
3892}
3893
3894/* Given uses :: array of UShort, indexed by IRTemp
3895   Add the use-occurrences of temps in this expression
3896   to the env.
3897*/
3898static void aoccCount_Expr ( UShort* uses, IRExpr* e )
3899{
3900   Int i;
3901
3902   switch (e->tag) {
3903
3904      case Iex_RdTmp: /* the only interesting case */
3905         uses[e->Iex.RdTmp.tmp]++;
3906         return;
3907
3908      case Iex_Mux0X:
3909         aoccCount_Expr(uses, e->Iex.Mux0X.cond);
3910         aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
3911         aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
3912         return;
3913
3914      case Iex_Qop:
3915         aoccCount_Expr(uses, e->Iex.Qop.arg1);
3916         aoccCount_Expr(uses, e->Iex.Qop.arg2);
3917         aoccCount_Expr(uses, e->Iex.Qop.arg3);
3918         aoccCount_Expr(uses, e->Iex.Qop.arg4);
3919         return;
3920
3921      case Iex_Triop:
3922         aoccCount_Expr(uses, e->Iex.Triop.arg1);
3923         aoccCount_Expr(uses, e->Iex.Triop.arg2);
3924         aoccCount_Expr(uses, e->Iex.Triop.arg3);
3925         return;
3926
3927      case Iex_Binop:
3928         aoccCount_Expr(uses, e->Iex.Binop.arg1);
3929         aoccCount_Expr(uses, e->Iex.Binop.arg2);
3930         return;
3931
3932      case Iex_Unop:
3933         aoccCount_Expr(uses, e->Iex.Unop.arg);
3934         return;
3935
3936      case Iex_Load:
3937         aoccCount_Expr(uses, e->Iex.Load.addr);
3938         return;
3939
3940      case Iex_CCall:
3941         for (i = 0; e->Iex.CCall.args[i]; i++)
3942            aoccCount_Expr(uses, e->Iex.CCall.args[i]);
3943         return;
3944
3945      case Iex_GetI:
3946         aoccCount_Expr(uses, e->Iex.GetI.ix);
3947         return;
3948
3949      case Iex_Const:
3950      case Iex_Get:
3951         return;
3952
3953      default:
3954         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3955         vpanic("aoccCount_Expr");
3956    }
3957}
3958
3959
3960/* Given uses :: array of UShort, indexed by IRTemp
3961   Add the use-occurrences of temps in this statement
3962   to the env.
3963*/
3964static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
3965{
3966   Int      i;
3967   IRDirty* d;
3968   IRCAS*   cas;
3969   switch (st->tag) {
3970      case Ist_AbiHint:
3971         aoccCount_Expr(uses, st->Ist.AbiHint.base);
3972         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
3973         return;
3974      case Ist_WrTmp:
3975         aoccCount_Expr(uses, st->Ist.WrTmp.data);
3976         return;
3977      case Ist_Put:
3978         aoccCount_Expr(uses, st->Ist.Put.data);
3979         return;
3980      case Ist_PutI:
3981         aoccCount_Expr(uses, st->Ist.PutI.ix);
3982         aoccCount_Expr(uses, st->Ist.PutI.data);
3983         return;
3984      case Ist_Store:
3985         aoccCount_Expr(uses, st->Ist.Store.addr);
3986         aoccCount_Expr(uses, st->Ist.Store.data);
3987         return;
3988      case Ist_CAS:
3989         cas = st->Ist.CAS.details;
3990         aoccCount_Expr(uses, cas->addr);
3991         if (cas->expdHi)
3992            aoccCount_Expr(uses, cas->expdHi);
3993         aoccCount_Expr(uses, cas->expdLo);
3994         if (cas->dataHi)
3995            aoccCount_Expr(uses, cas->dataHi);
3996         aoccCount_Expr(uses, cas->dataLo);
3997         return;
3998      case Ist_LLSC:
3999         aoccCount_Expr(uses, st->Ist.LLSC.addr);
4000         if (st->Ist.LLSC.storedata)
4001            aoccCount_Expr(uses, st->Ist.LLSC.storedata);
4002         return;
4003      case Ist_Dirty:
4004         d = st->Ist.Dirty.details;
4005         if (d->mFx != Ifx_None)
4006            aoccCount_Expr(uses, d->mAddr);
4007         aoccCount_Expr(uses, d->guard);
4008         for (i = 0; d->args[i]; i++)
4009            aoccCount_Expr(uses, d->args[i]);
4010         return;
4011      case Ist_NoOp:
4012      case Ist_IMark:
4013      case Ist_MBE:
4014         return;
4015      case Ist_Exit:
4016         aoccCount_Expr(uses, st->Ist.Exit.guard);
4017         return;
4018      default:
4019         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4020         vpanic("aoccCount_Stmt");
4021   }
4022}
4023
4024/* Look up a binding for tmp in the env.  If found, return the bound
4025   expression, and set the env's binding to NULL so it is marked as
4026   used.  If not found, return NULL. */
4027
4028static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
4029{
4030   Int i;
4031   for (i = 0; i < A_NENV; i++) {
4032      if (env[i].binder == tmp && env[i].bindee != NULL) {
4033         IRExpr* bindee = env[i].bindee;
4034         env[i].bindee = NULL;
4035         return bindee;
4036      }
4037   }
4038   return NULL;
4039}
4040
4041/* Traverse e, looking for temps.  For each observed temp, see if env
4042   contains a binding for the temp, and if so return the bound value.
4043   The env has the property that any binding it holds is
4044   'single-shot', so once a binding is used, it is marked as no longer
4045   available, by setting its .bindee field to NULL. */
4046
4047static inline Bool is_Unop ( IRExpr* e, IROp op ) {
4048   return e->tag == Iex_Unop && e->Iex.Unop.op == op;
4049}
4050static inline Bool is_Binop ( IRExpr* e, IROp op ) {
4051   return e->tag == Iex_Binop && e->Iex.Binop.op == op;
4052}
4053
4054static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
4055{
4056   switch (op) {
4057   case Iop_Or32:
4058      /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
4059      if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
4060         return IRExpr_Unop( Iop_CmpwNEZ32,
4061                             IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
4062                                                     a2->Iex.Unop.arg ) );
4063      break;
4064
4065   case Iop_CmpNE32:
4066      /* Since X has type Ity_I1 we can simplify:
4067         CmpNE32(1Uto32(X),0)) ==> X */
4068      if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
4069         return a1->Iex.Unop.arg;
4070      break;
4071
4072   default:
4073      break;
4074   }
4075   /* no reduction rule applies */
4076   return IRExpr_Binop( op, a1, a2 );
4077}
4078
4079static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
4080{
4081   switch (op) {
4082   case Iop_CmpwNEZ64:
4083      /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4084      if (is_Binop(aa, Iop_Or64)
4085          && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
4086         return fold_IRExpr_Unop(
4087                   Iop_CmpwNEZ64,
4088                   IRExpr_Binop(Iop_Or64,
4089                                aa->Iex.Binop.arg1->Iex.Unop.arg,
4090                                aa->Iex.Binop.arg2));
4091      /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4092      if (is_Binop(aa, Iop_Or64)
4093          && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
4094         return fold_IRExpr_Unop(
4095                   Iop_CmpwNEZ64,
4096                   IRExpr_Binop(Iop_Or64,
4097                                aa->Iex.Binop.arg1,
4098                                aa->Iex.Binop.arg2->Iex.Unop.arg));
4099      break;
4100   case Iop_CmpNEZ64:
4101      /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
4102      if (is_Unop(aa, Iop_Left64))
4103         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
4104      break;
4105   case Iop_CmpwNEZ32:
4106      /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
4107      if (is_Unop(aa, Iop_CmpwNEZ32))
4108         return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
4109      break;
4110   case Iop_CmpNEZ32:
4111      /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
4112      if (is_Unop(aa, Iop_Left32))
4113         return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
4114      /* CmpNEZ32( 1Uto32(X) ) --> X */
4115      if (is_Unop(aa, Iop_1Uto32))
4116         return aa->Iex.Unop.arg;
4117      break;
4118   case Iop_CmpNEZ8:
4119      /* CmpNEZ8( 1Uto8(X) ) --> X */
4120      if (is_Unop(aa, Iop_1Uto8))
4121         return aa->Iex.Unop.arg;
4122      break;
4123   case Iop_Left32:
4124      /* Left32( Left32(x) ) --> Left32(x) */
4125      if (is_Unop(aa, Iop_Left32))
4126         return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
4127      break;
4128   case Iop_32to1:
4129      /* 32to1( 1Uto32 ( x ) ) --> x */
4130      if (is_Unop(aa, Iop_1Uto32))
4131         return aa->Iex.Unop.arg;
4132      /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
4133      if (is_Unop(aa, Iop_CmpwNEZ32))
4134         return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
4135      break;
4136   case Iop_64to1:
4137      /* 64to1( 1Uto64 ( x ) ) --> x */
4138      if (is_Unop(aa, Iop_1Uto64))
4139         return aa->Iex.Unop.arg;
4140      /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
4141      if (is_Unop(aa, Iop_CmpwNEZ64))
4142         return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
4143      break;
4144   case Iop_64to32:
4145      /* 64to32( 32Uto64 ( x )) --> x */
4146      if (is_Unop(aa, Iop_32Uto64))
4147         return aa->Iex.Unop.arg;
4148      /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
4149      if (is_Unop(aa, Iop_8Uto64))
4150         return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
4151      break;
4152
4153   case Iop_32Uto64:
4154      /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
4155      if (is_Unop(aa, Iop_8Uto32))
4156         return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
4157      /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
4158      if (is_Unop(aa, Iop_16Uto32))
4159         return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
4160      break;
4161
4162   case Iop_1Sto32:
4163      /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
4164      if (is_Unop(aa, Iop_CmpNEZ8)
4165          && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
4166          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
4167          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
4168                     Iop_CmpNEZ32)) {
4169         return IRExpr_Unop( Iop_CmpwNEZ32,
4170                             aa->Iex.Unop.arg->Iex.Unop.arg
4171                               ->Iex.Unop.arg->Iex.Unop.arg);
4172      }
4173      break;
4174
4175
4176   default:
4177      break;
4178   }
4179   /* no reduction rule applies */
4180   return IRExpr_Unop( op, aa );
4181}
4182
4183static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
4184{
4185   IRExpr*  e2;
4186   IRExpr** args2;
4187   Int      i;
4188
4189   switch (e->tag) {
4190
4191      case Iex_CCall:
4192         args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
4193         for (i = 0; args2[i]; i++)
4194            args2[i] = atbSubst_Expr(env,args2[i]);
4195         return IRExpr_CCall(
4196                   e->Iex.CCall.cee,
4197                   e->Iex.CCall.retty,
4198                   args2
4199                );
4200      case Iex_RdTmp:
4201         e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4202         return e2 ? e2 : e;
4203      case Iex_Mux0X:
4204         return IRExpr_Mux0X(
4205                   atbSubst_Expr(env, e->Iex.Mux0X.cond),
4206                   atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4207                   atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4208                );
4209      case Iex_Qop:
4210         return IRExpr_Qop(
4211                   e->Iex.Qop.op,
4212                   atbSubst_Expr(env, e->Iex.Qop.arg1),
4213                   atbSubst_Expr(env, e->Iex.Qop.arg2),
4214                   atbSubst_Expr(env, e->Iex.Qop.arg3),
4215                   atbSubst_Expr(env, e->Iex.Qop.arg4)
4216                );
4217      case Iex_Triop:
4218         return IRExpr_Triop(
4219                   e->Iex.Triop.op,
4220                   atbSubst_Expr(env, e->Iex.Triop.arg1),
4221                   atbSubst_Expr(env, e->Iex.Triop.arg2),
4222                   atbSubst_Expr(env, e->Iex.Triop.arg3)
4223                );
4224      case Iex_Binop:
4225         return fold_IRExpr_Binop(
4226                   e->Iex.Binop.op,
4227                   atbSubst_Expr(env, e->Iex.Binop.arg1),
4228                   atbSubst_Expr(env, e->Iex.Binop.arg2)
4229                );
4230      case Iex_Unop:
4231         return fold_IRExpr_Unop(
4232                   e->Iex.Unop.op,
4233                   atbSubst_Expr(env, e->Iex.Unop.arg)
4234                );
4235      case Iex_Load:
4236         return IRExpr_Load(
4237                   e->Iex.Load.end,
4238                   e->Iex.Load.ty,
4239                   atbSubst_Expr(env, e->Iex.Load.addr)
4240                );
4241      case Iex_GetI:
4242         return IRExpr_GetI(
4243                   e->Iex.GetI.descr,
4244                   atbSubst_Expr(env, e->Iex.GetI.ix),
4245                   e->Iex.GetI.bias
4246                );
4247      case Iex_Const:
4248      case Iex_Get:
4249         return e;
4250      default:
4251         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4252         vpanic("atbSubst_Expr");
4253   }
4254}
4255
4256/* Same deal as atbSubst_Expr, except for stmts. */
4257
4258static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4259{
4260   Int     i;
4261   IRDirty *d, *d2;
4262   IRCAS   *cas, *cas2;
4263   switch (st->tag) {
4264      case Ist_AbiHint:
4265         return IRStmt_AbiHint(
4266                   atbSubst_Expr(env, st->Ist.AbiHint.base),
4267                   st->Ist.AbiHint.len,
4268                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
4269                );
4270      case Ist_Store:
4271         return IRStmt_Store(
4272                   st->Ist.Store.end,
4273                   atbSubst_Expr(env, st->Ist.Store.addr),
4274                   atbSubst_Expr(env, st->Ist.Store.data)
4275                );
4276      case Ist_WrTmp:
4277         return IRStmt_WrTmp(
4278                   st->Ist.WrTmp.tmp,
4279                   atbSubst_Expr(env, st->Ist.WrTmp.data)
4280                );
4281      case Ist_Put:
4282         return IRStmt_Put(
4283                   st->Ist.Put.offset,
4284                   atbSubst_Expr(env, st->Ist.Put.data)
4285                );
4286      case Ist_PutI:
4287         return IRStmt_PutI(
4288                   st->Ist.PutI.descr,
4289                   atbSubst_Expr(env, st->Ist.PutI.ix),
4290                   st->Ist.PutI.bias,
4291                   atbSubst_Expr(env, st->Ist.PutI.data)
4292                );
4293
4294      case Ist_Exit:
4295         return IRStmt_Exit(
4296                   atbSubst_Expr(env, st->Ist.Exit.guard),
4297                   st->Ist.Exit.jk,
4298                   st->Ist.Exit.dst
4299                );
4300      case Ist_IMark:
4301         return IRStmt_IMark(st->Ist.IMark.addr,
4302                             st->Ist.IMark.len,
4303                             st->Ist.IMark.delta);
4304      case Ist_NoOp:
4305         return IRStmt_NoOp();
4306      case Ist_MBE:
4307         return IRStmt_MBE(st->Ist.MBE.event);
4308      case Ist_CAS:
4309         cas  = st->Ist.CAS.details;
4310         cas2 = mkIRCAS(
4311                   cas->oldHi, cas->oldLo, cas->end,
4312                   atbSubst_Expr(env, cas->addr),
4313                   cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4314                   atbSubst_Expr(env, cas->expdLo),
4315                   cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4316                   atbSubst_Expr(env, cas->dataLo)
4317                );
4318         return IRStmt_CAS(cas2);
4319      case Ist_LLSC:
4320         return IRStmt_LLSC(
4321                   st->Ist.LLSC.end,
4322                   st->Ist.LLSC.result,
4323                   atbSubst_Expr(env, st->Ist.LLSC.addr),
4324                   st->Ist.LLSC.storedata
4325                      ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4326                );
4327      case Ist_Dirty:
4328         d  = st->Ist.Dirty.details;
4329         d2 = emptyIRDirty();
4330         *d2 = *d;
4331         if (d2->mFx != Ifx_None)
4332            d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4333         d2->guard = atbSubst_Expr(env, d2->guard);
4334         for (i = 0; d2->args[i]; i++)
4335            d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4336         return IRStmt_Dirty(d2);
4337      default:
4338         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4339         vpanic("atbSubst_Stmt");
4340   }
4341}
4342
4343/* notstatic */ void ado_treebuild_BB ( IRSB* bb )
4344{
4345   Int      i, j, k, m;
4346   Bool     stmtPuts, stmtStores, invalidateMe;
4347   IRStmt*  st;
4348   IRStmt*  st2;
4349   ATmpInfo env[A_NENV];
4350
4351   Int       n_tmps = bb->tyenv->types_used;
4352   UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
4353
4354   /* Phase 1.  Scan forwards in bb, counting use occurrences of each
4355      temp.  Also count occurrences in the bb->next field. */
4356
4357   for (i = 0; i < n_tmps; i++)
4358      uses[i] = 0;
4359
4360   for (i = 0; i < bb->stmts_used; i++) {
4361      st = bb->stmts[i];
4362      if (st->tag == Ist_NoOp)
4363         continue;
4364      aoccCount_Stmt( uses, st );
4365   }
4366   aoccCount_Expr(uses, bb->next );
4367
4368#  if 0
4369   for (i = 0; i < n_tmps; i++) {
4370      if (uses[i] == 0)
4371        continue;
4372      ppIRTemp( (IRTemp)i );
4373      vex_printf("  used %d\n", (Int)uses[i] );
4374   }
4375#  endif
4376
4377   /* Phase 2.  Scan forwards in bb.  For each statement in turn:
4378
4379         If the env is full, emit the end element.  This guarantees
4380         there is at least one free slot in the following.
4381
4382         On seeing 't = E', occ(t)==1,
4383            let E'=env(E)
4384            delete this stmt
4385            add t -> E' to the front of the env
4386            Examine E' and set the hints for E' appropriately
4387              (doesLoad? doesGet?)
4388
4389         On seeing any other stmt,
4390            let stmt' = env(stmt)
4391            remove from env any 't=E' binds invalidated by stmt
4392                emit the invalidated stmts
4393            emit stmt'
4394            compact any holes in env
4395              by sliding entries towards the front
4396
4397      Finally, apply env to bb->next.
4398   */
4399
4400   for (i = 0; i < A_NENV; i++) {
4401      env[i].bindee = NULL;
4402      env[i].binder = IRTemp_INVALID;
4403   }
4404
4405   /* The stmts in bb are being reordered, and we are guaranteed to
4406      end up with no more than the number we started with.  Use i to
4407      be the cursor of the current stmt examined and j <= i to be that
4408      for the current stmt being written.
4409   */
4410   j = 0;
4411   for (i = 0; i < bb->stmts_used; i++) {
4412
4413      st = bb->stmts[i];
4414      if (st->tag == Ist_NoOp)
4415         continue;
4416
4417      /* Ensure there's at least one space in the env, by emitting
4418         the oldest binding if necessary. */
4419      if (env[A_NENV-1].bindee != NULL) {
4420         bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
4421                                      env[A_NENV-1].bindee );
4422         j++;
4423         vassert(j <= i);
4424         env[A_NENV-1].bindee = NULL;
4425      }
4426
4427      /* Consider current stmt. */
4428      if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
4429         IRExpr *e, *e2;
4430
4431         /* optional extra: dump dead bindings as we find them.
4432            Removes the need for a prior dead-code removal pass. */
4433         if (uses[st->Ist.WrTmp.tmp] == 0) {
4434	    if (0) vex_printf("DEAD binding\n");
4435            continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4436         }
4437         vassert(uses[st->Ist.WrTmp.tmp] == 1);
4438
4439         /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
4440            actions. */
4441         e  = st->Ist.WrTmp.data;
4442         e2 = atbSubst_Expr(env, e);
4443         addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
4444         setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
4445         /* don't advance j, as we are deleting this stmt and instead
4446            holding it temporarily in the env. */
4447         continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4448      }
4449
4450      /* we get here for any other kind of statement. */
4451      /* 'use up' any bindings required by the current statement. */
4452      st2 = atbSubst_Stmt(env, st);
4453
4454      /* Now, before this stmt, dump any bindings in env that it
4455         invalidates.  These need to be dumped in the order in which
4456         they originally entered env -- that means from oldest to
4457         youngest. */
4458
4459      /* stmtPuts/stmtStores characterise what the stmt under
4460         consideration does, or might do (sidely safe @ True). */
4461      stmtPuts
4462         = toBool( st->tag == Ist_Put
4463                   || st->tag == Ist_PutI
4464                   || st->tag == Ist_Dirty );
4465
4466      /* be True if this stmt writes memory or might do (==> we don't
4467         want to reorder other loads or stores relative to it).  Also,
4468         both LL and SC fall under this classification, since we
4469         really ought to be conservative and not reorder any other
4470         memory transactions relative to them. */
4471      stmtStores
4472         = toBool( st->tag == Ist_Store
4473                   || st->tag == Ist_Dirty
4474                   || st->tag == Ist_LLSC );
4475
4476      for (k = A_NENV-1; k >= 0; k--) {
4477         if (env[k].bindee == NULL)
4478            continue;
4479         /* Compare the actions of this stmt with the actions of
4480            binding 'k', to see if they invalidate the binding. */
4481         invalidateMe
4482            = toBool(
4483              /* a store invalidates loaded data */
4484              (env[k].doesLoad && stmtStores)
4485              /* a put invalidates get'd data */
4486              || (env[k].doesGet && stmtPuts)
4487              /* a put invalidates loaded data.  Note, we could do
4488                 much better here in the sense that we only need to
4489                 invalidate trees containing loads if the Put in
4490                 question is marked as requiring precise
4491                 exceptions. */
4492              || (env[k].doesLoad && stmtPuts)
4493              /* probably overly conservative: a memory bus event
4494                 invalidates absolutely everything, so that all
4495                 computation prior to it is forced to complete before
4496                 proceeding with the event (fence,lock,unlock). */
4497              || st->tag == Ist_MBE
4498              /* also be (probably overly) paranoid re AbiHints */
4499              || st->tag == Ist_AbiHint
4500              );
4501         if (invalidateMe) {
4502            bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
4503            j++;
4504            vassert(j <= i);
4505            env[k].bindee = NULL;
4506         }
4507      }
4508
4509      /* Slide in-use entries in env up to the front */
4510      m = 0;
4511      for (k = 0; k < A_NENV; k++) {
4512         if (env[k].bindee != NULL) {
4513            env[m] = env[k];
4514            m++;
4515	 }
4516      }
4517      for (m = m; m < A_NENV; m++) {
4518         env[m].bindee = NULL;
4519      }
4520
4521      /* finally, emit the substituted statement */
4522      bb->stmts[j] = st2;
4523      /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
4524      j++;
4525
4526      vassert(j <= i+1);
4527   } /* for each stmt in the original bb ... */
4528
4529   /* Finally ... substitute the ->next field as much as possible, and
4530      dump any left-over bindings.  Hmm.  Perhaps there should be no
4531      left over bindings?  Or any left-over bindings are
4532      by definition dead? */
4533   bb->next = atbSubst_Expr(env, bb->next);
4534   bb->stmts_used = j;
4535}
4536
4537
4538/*---------------------------------------------------------------*/
4539/*--- iropt main                                              ---*/
4540/*---------------------------------------------------------------*/
4541
4542static Bool iropt_verbose = False; /* True; */
4543
4544
4545/* Do a simple cleanup pass on bb.  This is: redundant Get removal,
4546   redundant Put removal, constant propagation, dead code removal,
4547   clean helper specialisation, and dead code removal (again).
4548*/
4549
4550
4551static
4552IRSB* cheap_transformations (
4553         IRSB* bb,
4554         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4555         Bool (*preciseMemExnsFn)(Int,Int)
4556      )
4557{
4558   redundant_get_removal_BB ( bb );
4559   if (iropt_verbose) {
4560      vex_printf("\n========= REDUNDANT GET\n\n" );
4561      ppIRSB(bb);
4562   }
4563
4564   redundant_put_removal_BB ( bb, preciseMemExnsFn );
4565   if (iropt_verbose) {
4566      vex_printf("\n========= REDUNDANT PUT\n\n" );
4567      ppIRSB(bb);
4568   }
4569
4570   bb = cprop_BB ( bb );
4571   if (iropt_verbose) {
4572      vex_printf("\n========= CPROPD\n\n" );
4573      ppIRSB(bb);
4574   }
4575
4576   do_deadcode_BB ( bb );
4577   if (iropt_verbose) {
4578      vex_printf("\n========= DEAD\n\n" );
4579      ppIRSB(bb);
4580   }
4581
4582   bb = spec_helpers_BB ( bb, specHelper );
4583   do_deadcode_BB ( bb );
4584   if (iropt_verbose) {
4585      vex_printf("\n========= SPECd \n\n" );
4586      ppIRSB(bb);
4587   }
4588
4589   return bb;
4590}
4591
4592
4593/* Do some more expensive transformations on bb, which are aimed at
4594   optimising as much as possible in the presence of GetI and PutI.  */
4595
4596static
4597IRSB* expensive_transformations( IRSB* bb )
4598{
4599   (void)do_cse_BB( bb );
4600   collapse_AddSub_chains_BB( bb );
4601   do_redundant_GetI_elimination( bb );
4602   do_redundant_PutI_elimination( bb );
4603   do_deadcode_BB( bb );
4604   return bb;
4605}
4606
4607
4608/* Scan a flattened BB to look for signs that more expensive
4609   optimisations might be useful:
4610   - find out if there are any GetIs and PutIs
4611   - find out if there are any floating or vector-typed temporaries
4612*/
4613
4614static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
4615                                 /*OUT*/Bool* hasVorFtemps,
4616                                 IRSB* bb )
4617{
4618   Int      i, j;
4619   IRStmt*  st;
4620   IRDirty* d;
4621   IRCAS*   cas;
4622
4623   *hasGetIorPutI = False;
4624   *hasVorFtemps  = False;
4625
4626   for (i = 0; i < bb->stmts_used; i++) {
4627      st = bb->stmts[i];
4628      switch (st->tag) {
4629         case Ist_AbiHint:
4630            vassert(isIRAtom(st->Ist.AbiHint.base));
4631            vassert(isIRAtom(st->Ist.AbiHint.nia));
4632            break;
4633         case Ist_PutI:
4634            *hasGetIorPutI = True;
4635            break;
4636         case Ist_WrTmp:
4637            if (st->Ist.WrTmp.data->tag == Iex_GetI)
4638               *hasGetIorPutI = True;
4639            switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
4640               case Ity_I1: case Ity_I8: case Ity_I16:
4641               case Ity_I32: case Ity_I64: case Ity_I128:
4642                  break;
4643               case Ity_F32: case Ity_F64: case Ity_F128: case Ity_V128:
4644                  *hasVorFtemps = True;
4645                  break;
4646               default:
4647                  goto bad;
4648            }
4649            break;
4650         case Ist_Put:
4651            vassert(isIRAtom(st->Ist.Put.data));
4652            break;
4653         case Ist_Store:
4654            vassert(isIRAtom(st->Ist.Store.addr));
4655            vassert(isIRAtom(st->Ist.Store.data));
4656            break;
4657         case Ist_CAS:
4658            cas = st->Ist.CAS.details;
4659            vassert(isIRAtom(cas->addr));
4660            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
4661            vassert(isIRAtom(cas->expdLo));
4662            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
4663            vassert(isIRAtom(cas->dataLo));
4664            break;
4665         case Ist_LLSC:
4666            vassert(isIRAtom(st->Ist.LLSC.addr));
4667            if (st->Ist.LLSC.storedata)
4668               vassert(isIRAtom(st->Ist.LLSC.storedata));
4669            break;
4670         case Ist_Dirty:
4671            d = st->Ist.Dirty.details;
4672            vassert(isIRAtom(d->guard));
4673            for (j = 0; d->args[j]; j++)
4674               vassert(isIRAtom(d->args[j]));
4675            if (d->mFx != Ifx_None)
4676               vassert(isIRAtom(d->mAddr));
4677            break;
4678         case Ist_NoOp:
4679         case Ist_IMark:
4680         case Ist_MBE:
4681            break;
4682         case Ist_Exit:
4683            vassert(isIRAtom(st->Ist.Exit.guard));
4684            break;
4685         default:
4686         bad:
4687            ppIRStmt(st);
4688            vpanic("considerExpensives");
4689      }
4690   }
4691}
4692
4693
4694/* ---------------- The main iropt entry point. ---------------- */
4695
4696/* exported from this file */
4697/* Rules of the game:
4698
4699   - IRExpr/IRStmt trees should be treated as immutable, as they
4700     may get shared.  So never change a field of such a tree node;
4701     instead construct and return a new one if needed.
4702*/
4703
4704
4705IRSB* do_iropt_BB(
4706         IRSB* bb0,
4707         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4708         Bool (*preciseMemExnsFn)(Int,Int),
4709         Addr64 guest_addr,
4710         VexArch guest_arch
4711      )
4712{
4713   static Int n_total     = 0;
4714   static Int n_expensive = 0;
4715
4716   Bool hasGetIorPutI, hasVorFtemps;
4717   IRSB *bb, *bb2;
4718
4719   n_total++;
4720
4721   /* First flatten the block out, since all other
4722      phases assume flat code. */
4723
4724   bb = flatten_BB ( bb0 );
4725
4726   if (iropt_verbose) {
4727      vex_printf("\n========= FLAT\n\n" );
4728      ppIRSB(bb);
4729   }
4730
4731   /* If at level 0, stop now. */
4732   if (vex_control.iropt_level <= 0) return bb;
4733
4734   /* Now do a preliminary cleanup pass, and figure out if we also
4735      need to do 'expensive' optimisations.  Expensive optimisations
4736      are deemed necessary if the block contains any GetIs or PutIs.
4737      If needed, do expensive transformations and then another cheap
4738      cleanup pass. */
4739
4740   bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4741
4742   if (guest_arch == VexArchARM) {
4743      /* Translating Thumb2 code produces a lot of chaff.  We have to
4744         work extra hard to get rid of it. */
4745      bb = cprop_BB(bb);
4746      bb = spec_helpers_BB ( bb, specHelper );
4747      redundant_put_removal_BB ( bb, preciseMemExnsFn );
4748      do_cse_BB( bb );
4749      do_deadcode_BB( bb );
4750   }
4751
4752   if (vex_control.iropt_level > 1) {
4753
4754      /* Peer at what we have, to decide how much more effort to throw
4755         at it. */
4756      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
4757
4758      if (hasVorFtemps && !hasGetIorPutI) {
4759         /* If any evidence of FP or Vector activity, CSE, as that
4760            tends to mop up all manner of lardy code to do with
4761            rounding modes.  Don't bother if hasGetIorPutI since that
4762            case leads into the expensive transformations, which do
4763            CSE anyway. */
4764         (void)do_cse_BB( bb );
4765         do_deadcode_BB( bb );
4766      }
4767
4768      if (hasGetIorPutI) {
4769         Bool cses;
4770         n_expensive++;
4771         if (DEBUG_IROPT)
4772            vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
4773         bb = expensive_transformations( bb );
4774         bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4775         /* Potentially common up GetIs */
4776         cses = do_cse_BB( bb );
4777         if (cses)
4778            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4779      }
4780
4781      /* Now have a go at unrolling simple (single-BB) loops.  If
4782         successful, clean up the results as much as possible. */
4783
4784      bb2 = maybe_loop_unroll_BB( bb, guest_addr );
4785      if (bb2) {
4786         bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
4787         if (hasGetIorPutI) {
4788            bb = expensive_transformations( bb );
4789            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4790         } else {
4791            /* at least do CSE and dead code removal */
4792            do_cse_BB( bb );
4793            do_deadcode_BB( bb );
4794         }
4795         if (0) vex_printf("vex iropt: unrolled a loop\n");
4796      }
4797
4798   }
4799
4800   return bb;
4801}
4802
4803
4804/*---------------------------------------------------------------*/
4805/*--- end                                            ir_opt.c ---*/
4806/*---------------------------------------------------------------*/
4807