1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*---------------------------------------------------------------*/
4/*--- begin                                          ir_opt.c ---*/
5/*---------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2004-2015 OpenWorks LLP
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#include "libvex_basictypes.h"
38#include "libvex_ir.h"
39#include "libvex.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "ir_opt.h"
44
45
46/* Set to 1 for lots of debugging output. */
47#define DEBUG_IROPT 0
48
49/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50#define STATS_IROPT 0
51
52
53/* What iropt does, 29 Dec 04.
54
55   It takes an IRSB and produces a new one with the same meaning,
56   defined thus:
57
58   After execution of the new BB, all guest state and guest memory is
59   the same as after execution of the original.  This is true
60   regardless of how the block was exited (at the end vs side exit).
61
62   In addition, parts of the guest state will be identical to that
63   created by execution of the original at the following observation
64   points:
65
66   * In a dirty helper call, any parts of the guest state that the
67     helper states that it reads or modifies will be up to date.
68     Also, guest memory will be up to date.  Parts of the guest state
69     not marked as being read or modified by the helper cannot be
70     assumed to be up-to-date at the point where the helper is called.
71
72   * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73     The guest state is only up to date only as explained above
74     (i.e. at SB exits and as specified by dirty helper call).
75     Also, the stack pointer register is up to date at memory
76     exception points (as this is needed for the stack extension
77     logic in m_signals.c).
78
79   * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80     Immediately prior to any load or store, those parts of the guest
81     state marked as requiring precise exceptions will be up to date.
82     Also, guest memory will be up to date.  Parts of the guest state
83     not marked as requiring precise exceptions cannot be assumed to
84     be up-to-date at the point of the load/store.
85
86   * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87     Same as minimal, but all the guest state is up to date at memory
88     exception points.
89
90   * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91     Guest state is up to date at each instruction.
92
93   The relative order of loads and stores (including loads/stores of
94   guest memory done by dirty helpers annotated as such) is not
95   changed.  However, the relative order of loads with no intervening
96   stores/modifies may be changed.
97
98   Transformation order
99   ~~~~~~~~~~~~~~~~~~~~
100
101   There are three levels of optimisation, controlled by
102   vex_control.iropt_level.  Define first:
103
104   "Cheap transformations" are the following sequence:
105      * Redundant-Get removal
106      * Redundant-Put removal
107      * Constant propagation/folding
108      * Dead code removal
109      * Specialisation of clean helper functions
110      * Dead code removal
111
112   "Expensive transformations" are the following sequence:
113      * CSE
114      * Folding of add/sub chains
115      * Redundant-GetI removal
116      * Redundant-PutI removal
117      * Dead code removal
118
119   Then the transformations are as follows, as defined by
120   vex_control.iropt_level:
121
122   Level 0:
123      * Flatten into atomic form.
124
125   Level 1: the following sequence:
126      * Flatten into atomic form.
127      * Cheap transformations.
128
129   Level 2: the following sequence
130      * Flatten into atomic form.
131      * Cheap transformations.
132      * If block contains any floating or vector types, CSE.
133      * If block contains GetI or PutI, Expensive transformations.
134      * Try unrolling loops.  Three possible outcomes:
135        - No effect: do nothing more.
136        - Unrolled a loop, and block does not contain GetI or PutI:
137          Do: * CSE
138              * Dead code removal
139        - Unrolled a loop, and block contains GetI or PutI:
140          Do: * Expensive transformations
141              * Cheap transformations
142*/
143
144/* Implementation notes, 29 Dec 04.
145
146   TODO (important): I think rPutI removal ignores precise exceptions
147   and is therefore in a sense, wrong.  In the sense that PutIs are
148   assumed not to write parts of the guest state that we need to have
149   up-to-date at loads/stores.  So far on x86 guest that has not
150   mattered since indeed only the x87 FP registers and tags are
151   accessed using GetI/PutI, and there is no need so far for them to
152   be up to date at mem exception points.  The rPutI pass should be
153   fixed.
154
155   TODO: improve pessimistic handling of precise exceptions
156     in the tree builder.
157
158   TODO: check interaction of rGetI and dirty helpers.
159
160   F64i constants are treated differently from other constants.
161   They are not regarded as atoms, and instead lifted off and
162   bound to temps.  This allows them to participate in CSE, which
163   is important for getting good performance for x86 guest code.
164
165   CSE up F64 literals (already doing F64is)
166
167   CSE: consider carefully the requirement for precise exns
168        prior to making CSE any more aggressive.  */
169
170
171/*---------------------------------------------------------------*/
172/*--- Finite mappery, of a sort                               ---*/
173/*---------------------------------------------------------------*/
174
175/* General map from HWord-sized thing HWord-sized thing.  Could be by
176   hashing, but it's not clear whether or not this would really be any
177   faster. */
178
179typedef
180   struct {
181      Bool*  inuse;
182      HWord* key;
183      HWord* val;
184      Int    size;
185      Int    used;
186   }
187   HashHW;
188
189static HashHW* newHHW ( void )
190{
191   HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192   h->size   = 8;
193   h->used   = 0;
194   h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195   h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196   h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197   return h;
198}
199
200
201/* Look up key in the map. */
202
203static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204{
205   Int i;
206   /* vex_printf("lookupHHW(%llx)\n", key ); */
207   for (i = 0; i < h->used; i++) {
208      if (h->inuse[i] && h->key[i] == key) {
209         if (val)
210            *val = h->val[i];
211         return True;
212      }
213   }
214   return False;
215}
216
217
218/* Add key->val to the map.  Replaces any existing binding for key. */
219
220static void addToHHW ( HashHW* h, HWord key, HWord val )
221{
222   Int i, j;
223   /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224
225   /* Find and replace existing binding, if any. */
226   for (i = 0; i < h->used; i++) {
227      if (h->inuse[i] && h->key[i] == key) {
228         h->val[i] = val;
229         return;
230      }
231   }
232
233   /* Ensure a space is available. */
234   if (h->used == h->size) {
235      /* Copy into arrays twice the size. */
236      Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237      HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238      HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239      for (i = j = 0; i < h->size; i++) {
240         if (!h->inuse[i]) continue;
241         inuse2[j] = True;
242         key2[j] = h->key[i];
243         val2[j] = h->val[i];
244         j++;
245      }
246      h->used = j;
247      h->size *= 2;
248      h->inuse = inuse2;
249      h->key = key2;
250      h->val = val2;
251   }
252
253   /* Finally, add it. */
254   vassert(h->used < h->size);
255   h->inuse[h->used] = True;
256   h->key[h->used] = key;
257   h->val[h->used] = val;
258   h->used++;
259}
260
261
262/*---------------------------------------------------------------*/
263/*--- Flattening out a BB into atomic SSA form                ---*/
264/*---------------------------------------------------------------*/
265
266/* Non-critical helper, heuristic for reducing the number of tmp-tmp
267   copies made by flattening.  If in doubt return False. */
268
269static Bool isFlat ( IRExpr* e )
270{
271   if (e->tag == Iex_Get)
272      return True;
273   if (e->tag == Iex_Binop)
274      return toBool( isIRAtom(e->Iex.Binop.arg1)
275                     && isIRAtom(e->Iex.Binop.arg2) );
276   if (e->tag == Iex_Load)
277      return isIRAtom(e->Iex.Load.addr);
278   return False;
279}
280
281/* Flatten out 'ex' so it is atomic, returning a new expression with
282   the same value, after having appended extra IRTemp assignments to
283   the end of 'bb'. */
284
285static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286{
287   Int i;
288   IRExpr** newargs;
289   IRType ty = typeOfIRExpr(bb->tyenv, ex);
290   IRTemp t1;
291
292   switch (ex->tag) {
293
294      case Iex_GetI:
295         t1 = newIRTemp(bb->tyenv, ty);
296         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297            IRExpr_GetI(ex->Iex.GetI.descr,
298                        flatten_Expr(bb, ex->Iex.GetI.ix),
299                        ex->Iex.GetI.bias)));
300         return IRExpr_RdTmp(t1);
301
302      case Iex_Get:
303         t1 = newIRTemp(bb->tyenv, ty);
304         addStmtToIRSB(bb,
305            IRStmt_WrTmp(t1, ex));
306         return IRExpr_RdTmp(t1);
307
308      case Iex_Qop: {
309         IRQop* qop = ex->Iex.Qop.details;
310         t1 = newIRTemp(bb->tyenv, ty);
311         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312            IRExpr_Qop(qop->op,
313                         flatten_Expr(bb, qop->arg1),
314                         flatten_Expr(bb, qop->arg2),
315                         flatten_Expr(bb, qop->arg3),
316                         flatten_Expr(bb, qop->arg4))));
317         return IRExpr_RdTmp(t1);
318      }
319
320      case Iex_Triop: {
321         IRTriop* triop = ex->Iex.Triop.details;
322         t1 = newIRTemp(bb->tyenv, ty);
323         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324            IRExpr_Triop(triop->op,
325                         flatten_Expr(bb, triop->arg1),
326                         flatten_Expr(bb, triop->arg2),
327                         flatten_Expr(bb, triop->arg3))));
328         return IRExpr_RdTmp(t1);
329      }
330
331      case Iex_Binop:
332         t1 = newIRTemp(bb->tyenv, ty);
333         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334            IRExpr_Binop(ex->Iex.Binop.op,
335                         flatten_Expr(bb, ex->Iex.Binop.arg1),
336                         flatten_Expr(bb, ex->Iex.Binop.arg2))));
337         return IRExpr_RdTmp(t1);
338
339      case Iex_Unop:
340         t1 = newIRTemp(bb->tyenv, ty);
341         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342            IRExpr_Unop(ex->Iex.Unop.op,
343                        flatten_Expr(bb, ex->Iex.Unop.arg))));
344         return IRExpr_RdTmp(t1);
345
346      case Iex_Load:
347         t1 = newIRTemp(bb->tyenv, ty);
348         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349            IRExpr_Load(ex->Iex.Load.end,
350                        ex->Iex.Load.ty,
351                        flatten_Expr(bb, ex->Iex.Load.addr))));
352         return IRExpr_RdTmp(t1);
353
354      case Iex_CCall:
355         newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356         for (i = 0; newargs[i]; i++)
357            newargs[i] = flatten_Expr(bb, newargs[i]);
358         t1 = newIRTemp(bb->tyenv, ty);
359         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360            IRExpr_CCall(ex->Iex.CCall.cee,
361                         ex->Iex.CCall.retty,
362                         newargs)));
363         return IRExpr_RdTmp(t1);
364
365      case Iex_ITE:
366         t1 = newIRTemp(bb->tyenv, ty);
367         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368            IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369                       flatten_Expr(bb, ex->Iex.ITE.iftrue),
370                       flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371         return IRExpr_RdTmp(t1);
372
373      case Iex_Const:
374         /* Lift F64i constants out onto temps so they can be CSEd
375            later. */
376         if (ex->Iex.Const.con->tag == Ico_F64i) {
377            t1 = newIRTemp(bb->tyenv, ty);
378            addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379               IRExpr_Const(ex->Iex.Const.con)));
380            return IRExpr_RdTmp(t1);
381         } else {
382            /* Leave all other constants alone. */
383            return ex;
384         }
385
386      case Iex_RdTmp:
387         return ex;
388
389      default:
390         vex_printf("\n");
391         ppIRExpr(ex);
392         vex_printf("\n");
393         vpanic("flatten_Expr");
394   }
395}
396
397
398/* Append a completely flattened form of 'st' to the end of 'bb'. */
399
400static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401{
402   Int i;
403   IRExpr   *e1, *e2, *e3, *e4, *e5;
404   IRDirty  *d,  *d2;
405   IRCAS    *cas, *cas2;
406   IRPutI   *puti, *puti2;
407   IRLoadG  *lg;
408   IRStoreG *sg;
409   switch (st->tag) {
410      case Ist_Put:
411         if (isIRAtom(st->Ist.Put.data)) {
412            /* optimisation to reduce the amount of heap wasted
413               by the flattener */
414            addStmtToIRSB(bb, st);
415         } else {
416            /* general case, always correct */
417            e1 = flatten_Expr(bb, st->Ist.Put.data);
418            addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419         }
420         break;
421      case Ist_PutI:
422         puti = st->Ist.PutI.details;
423         e1 = flatten_Expr(bb, puti->ix);
424         e2 = flatten_Expr(bb, puti->data);
425         puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426         addStmtToIRSB(bb, IRStmt_PutI(puti2));
427         break;
428      case Ist_WrTmp:
429         if (isFlat(st->Ist.WrTmp.data)) {
430            /* optimisation, to reduce the number of tmp-tmp
431               copies generated */
432            addStmtToIRSB(bb, st);
433         } else {
434            /* general case, always correct */
435            e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436            addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437         }
438         break;
439      case Ist_Store:
440         e1 = flatten_Expr(bb, st->Ist.Store.addr);
441         e2 = flatten_Expr(bb, st->Ist.Store.data);
442         addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443         break;
444      case Ist_StoreG:
445         sg = st->Ist.StoreG.details;
446         e1 = flatten_Expr(bb, sg->addr);
447         e2 = flatten_Expr(bb, sg->data);
448         e3 = flatten_Expr(bb, sg->guard);
449         addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450         break;
451      case Ist_LoadG:
452         lg = st->Ist.LoadG.details;
453         e1 = flatten_Expr(bb, lg->addr);
454         e2 = flatten_Expr(bb, lg->alt);
455         e3 = flatten_Expr(bb, lg->guard);
456         addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457                                        e1, e2, e3));
458         break;
459      case Ist_CAS:
460         cas  = st->Ist.CAS.details;
461         e1   = flatten_Expr(bb, cas->addr);
462         e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463         e3   = flatten_Expr(bb, cas->expdLo);
464         e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465         e5   = flatten_Expr(bb, cas->dataLo);
466         cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467                         e1, e2, e3, e4, e5 );
468         addStmtToIRSB(bb, IRStmt_CAS(cas2));
469         break;
470      case Ist_LLSC:
471         e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472         e2 = st->Ist.LLSC.storedata
473                 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474                 : NULL;
475         addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476                                       st->Ist.LLSC.result, e1, e2));
477         break;
478      case Ist_Dirty:
479         d = st->Ist.Dirty.details;
480         d2 = emptyIRDirty();
481         *d2 = *d;
482         d2->args = shallowCopyIRExprVec(d2->args);
483         if (d2->mFx != Ifx_None) {
484            d2->mAddr = flatten_Expr(bb, d2->mAddr);
485         } else {
486            vassert(d2->mAddr == NULL);
487         }
488         d2->guard = flatten_Expr(bb, d2->guard);
489         for (i = 0; d2->args[i]; i++) {
490            IRExpr* arg = d2->args[i];
491            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
492               d2->args[i] = flatten_Expr(bb, arg);
493         }
494         addStmtToIRSB(bb, IRStmt_Dirty(d2));
495         break;
496      case Ist_NoOp:
497      case Ist_MBE:
498      case Ist_IMark:
499         addStmtToIRSB(bb, st);
500         break;
501      case Ist_AbiHint:
502         e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505         break;
506      case Ist_Exit:
507         e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508         addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509                                       st->Ist.Exit.dst,
510                                       st->Ist.Exit.offsIP));
511         break;
512      default:
513         vex_printf("\n");
514         ppIRStmt(st);
515         vex_printf("\n");
516         vpanic("flatten_Stmt");
517   }
518}
519
520
521static IRSB* flatten_BB ( IRSB* in )
522{
523   Int   i;
524   IRSB* out;
525   out = emptyIRSB();
526   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527   for (i = 0; i < in->stmts_used; i++)
528      if (in->stmts[i])
529         flatten_Stmt( out, in->stmts[i] );
530   out->next     = flatten_Expr( out, in->next );
531   out->jumpkind = in->jumpkind;
532   out->offsIP   = in->offsIP;
533   return out;
534}
535
536
537/*---------------------------------------------------------------*/
538/*--- In-place removal of redundant GETs                      ---*/
539/*---------------------------------------------------------------*/
540
541/* Scan forwards, building up an environment binding (min offset, max
542   offset) pairs to values, which will either be temps or constants.
543
544   On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545   env and if it matches, replace the Get with the stored value.  If
546   there is no match, add a (minoff,maxoff) :-> t binding.
547
548   On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549   any binding which fully or partially overlaps with (minoff,maxoff).
550   Then add a new (minoff,maxoff) :-> t or c binding.  */
551
552/* Extract the min/max offsets from a guest state array descriptor. */
553
554inline
555static void getArrayBounds ( IRRegArray* descr,
556                             UInt* minoff, UInt* maxoff )
557{
558   *minoff = descr->base;
559   *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560   vassert((*minoff & ~0xFFFF) == 0);
561   vassert((*maxoff & ~0xFFFF) == 0);
562   vassert(*minoff <= *maxoff);
563}
564
565/* Create keys, of the form ((minoffset << 16) | maxoffset). */
566
567static UInt mk_key_GetPut ( Int offset, IRType ty )
568{
569   /* offset should fit in 16 bits. */
570   UInt minoff = offset;
571   UInt maxoff = minoff + sizeofIRType(ty) - 1;
572   vassert((minoff & ~0xFFFF) == 0);
573   vassert((maxoff & ~0xFFFF) == 0);
574   return (minoff << 16) | maxoff;
575}
576
577static UInt mk_key_GetIPutI ( IRRegArray* descr )
578{
579   UInt minoff, maxoff;
580   getArrayBounds( descr, &minoff, &maxoff );
581   vassert((minoff & ~0xFFFF) == 0);
582   vassert((maxoff & ~0xFFFF) == 0);
583   return (minoff << 16) | maxoff;
584}
585
586/* Supposing h has keys of the form generated by mk_key_GetPut and
587   mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588   .. k_hi).
589*/
590static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591{
592   Int  j;
593   UInt e_lo, e_hi;
594   vassert(k_lo <= k_hi);
595   /* invalidate any env entries which in any way overlap (k_lo
596      .. k_hi) */
597   /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598
599   for (j = 0; j < h->used; j++) {
600      if (!h->inuse[j])
601         continue;
602      e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603      e_hi = ((UInt)h->key[j]) & 0xFFFF;
604      vassert(e_lo <= e_hi);
605      if (e_hi < k_lo || k_hi < e_lo)
606         continue; /* no overlap possible */
607      else
608         /* overlap; invalidate */
609         h->inuse[j] = False;
610   }
611}
612
613
614static void redundant_get_removal_BB ( IRSB* bb )
615{
616   HashHW* env = newHHW();
617   UInt    key = 0; /* keep gcc -O happy */
618   Int     i, j;
619   HWord   val;
620
621   for (i = 0; i < bb->stmts_used; i++) {
622      IRStmt* st = bb->stmts[i];
623
624      if (st->tag == Ist_NoOp)
625         continue;
626
627      /* Deal with Gets */
628      if (st->tag == Ist_WrTmp
629          && st->Ist.WrTmp.data->tag == Iex_Get) {
630         /* st is 't = Get(...)'.  Look up in the environment and see
631            if the Get can be replaced. */
632         IRExpr* get = st->Ist.WrTmp.data;
633         key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634                                     get->Iex.Get.ty );
635         if (lookupHHW(env, &val, (HWord)key)) {
636            /* found it */
637            /* Note, we could do better here.  If the types are
638               different we don't do the substitution, since doing so
639               could lead to invalidly-typed IR.  An improvement would
640               be to stick in a reinterpret-style cast, although that
641               would make maintaining flatness more difficult. */
642            IRExpr* valE    = (IRExpr*)val;
643            Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644                                      == st->Ist.WrTmp.data->Iex.Get.ty );
645            if (typesOK && DEBUG_IROPT) {
646               vex_printf("rGET: "); ppIRExpr(get);
647               vex_printf("  ->  "); ppIRExpr(valE);
648               vex_printf("\n");
649            }
650            if (typesOK)
651               bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652         } else {
653            /* Not found, but at least we know that t and the Get(...)
654               are now associated.  So add a binding to reflect that
655               fact. */
656            addToHHW( env, (HWord)key,
657                           (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658         }
659      }
660
661      /* Deal with Puts: invalidate any env entries overlapped by this
662         Put */
663      if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664         UInt k_lo, k_hi;
665         if (st->tag == Ist_Put) {
666            key = mk_key_GetPut( st->Ist.Put.offset,
667                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668         } else {
669            vassert(st->tag == Ist_PutI);
670            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671         }
672
673         k_lo = (key >> 16) & 0xFFFF;
674         k_hi = key & 0xFFFF;
675         invalidateOverlaps(env, k_lo, k_hi);
676      }
677      else
678      if (st->tag == Ist_Dirty) {
679         /* Deal with dirty helpers which write or modify guest state.
680            Invalidate the entire env.  We could do a lot better
681            here. */
682         IRDirty* d      = st->Ist.Dirty.details;
683         Bool     writes = False;
684         for (j = 0; j < d->nFxState; j++) {
685            if (d->fxState[j].fx == Ifx_Modify
686                || d->fxState[j].fx == Ifx_Write)
687            writes = True;
688         }
689         if (writes) {
690            /* dump the entire env (not clever, but correct ...) */
691            for (j = 0; j < env->used; j++)
692               env->inuse[j] = False;
693            if (0) vex_printf("rGET: trash env due to dirty helper\n");
694         }
695      }
696
697      /* add this one to the env, if appropriate */
698      if (st->tag == Ist_Put) {
699         vassert(isIRAtom(st->Ist.Put.data));
700         addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701      }
702
703   } /* for (i = 0; i < bb->stmts_used; i++) */
704
705}
706
707
708/*---------------------------------------------------------------*/
709/*--- In-place removal of redundant PUTs                      ---*/
710/*---------------------------------------------------------------*/
711
712/* Find any Get uses in st and invalidate any partially or fully
713   overlapping ranges listed in env.  Due to the flattening phase, the
714   only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715
716static void handle_gets_Stmt (
717               HashHW* env,
718               IRStmt* st,
719               Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720               VexRegisterUpdates pxControl
721            )
722{
723   Int     j;
724   UInt    key = 0; /* keep gcc -O happy */
725   Bool    isGet;
726   Bool    memRW = False;
727   IRExpr* e;
728
729   switch (st->tag) {
730
731      /* This is the only interesting case.  Deal with Gets in the RHS
732         expression. */
733      case Ist_WrTmp:
734         e = st->Ist.WrTmp.data;
735         switch (e->tag) {
736            case Iex_Get:
737               isGet = True;
738               key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739               break;
740            case Iex_GetI:
741               isGet = True;
742               key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743               break;
744            case Iex_Load:
745               isGet = False;
746               memRW = True;
747               break;
748            default:
749               isGet = False;
750         }
751         if (isGet) {
752            UInt k_lo, k_hi;
753            k_lo = (key >> 16) & 0xFFFF;
754            k_hi = key & 0xFFFF;
755            invalidateOverlaps(env, k_lo, k_hi);
756         }
757         break;
758
759      /* Be very conservative for dirty helper calls; dump the entire
760         environment.  The helper might read guest state, in which
761         case it needs to be flushed first.  Also, the helper might
762         access guest memory, in which case all parts of the guest
763         state requiring precise exceptions needs to be flushed.  The
764         crude solution is just to flush everything; we could easily
765         enough do a lot better if needed. */
766      /* Probably also overly-conservative, but also dump everything
767         if we hit a memory bus event (fence, lock, unlock).  Ditto
768         AbiHints, CASs, LLs and SCs. */
769      case Ist_AbiHint:
770         vassert(isIRAtom(st->Ist.AbiHint.base));
771         vassert(isIRAtom(st->Ist.AbiHint.nia));
772         /* fall through */
773      case Ist_MBE:
774      case Ist_Dirty:
775      case Ist_CAS:
776      case Ist_LLSC:
777         for (j = 0; j < env->used; j++)
778            env->inuse[j] = False;
779         break;
780
781      /* all other cases are boring. */
782      case Ist_Store:
783         vassert(isIRAtom(st->Ist.Store.addr));
784         vassert(isIRAtom(st->Ist.Store.data));
785         memRW = True;
786         break;
787      case Ist_StoreG: {
788         IRStoreG* sg = st->Ist.StoreG.details;
789         vassert(isIRAtom(sg->addr));
790         vassert(isIRAtom(sg->data));
791         vassert(isIRAtom(sg->guard));
792         memRW = True;
793         break;
794      }
795      case Ist_LoadG: {
796         IRLoadG* lg = st->Ist.LoadG.details;
797         vassert(isIRAtom(lg->addr));
798         vassert(isIRAtom(lg->alt));
799         vassert(isIRAtom(lg->guard));
800         memRW = True;
801         break;
802      }
803      case Ist_Exit:
804         vassert(isIRAtom(st->Ist.Exit.guard));
805         break;
806
807      case Ist_Put:
808         vassert(isIRAtom(st->Ist.Put.data));
809         break;
810
811      case Ist_PutI:
812         vassert(isIRAtom(st->Ist.PutI.details->ix));
813         vassert(isIRAtom(st->Ist.PutI.details->data));
814         break;
815
816      case Ist_NoOp:
817      case Ist_IMark:
818         break;
819
820      default:
821         vex_printf("\n");
822         ppIRStmt(st);
823         vex_printf("\n");
824         vpanic("handle_gets_Stmt");
825   }
826
827   if (memRW) {
828      /* This statement accesses memory.  So we might need to dump all parts
829         of the environment corresponding to guest state that may not
830         be reordered with respect to memory references.  That means
831         at least the stack pointer. */
832      switch (pxControl) {
833         case VexRegUpdAllregsAtMemAccess:
834            /* Precise exceptions required at mem access.
835               Flush all guest state. */
836            for (j = 0; j < env->used; j++)
837               env->inuse[j] = False;
838            break;
839         case VexRegUpdSpAtMemAccess:
840            /* We need to dump the stack pointer
841               (needed for stack extension in m_signals.c).
842               preciseMemExnsFn will use vex_control.iropt_register_updates
843               to verify only the sp is to be checked. */
844            /* fallthrough */
845         case VexRegUpdUnwindregsAtMemAccess:
846            for (j = 0; j < env->used; j++) {
847               if (!env->inuse[j])
848                  continue;
849               /* Just flush the minimal amount required, as computed by
850                  preciseMemExnsFn. */
851               HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852               HWord k_hi = env->key[j] & 0xFFFF;
853               if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854                  env->inuse[j] = False;
855            }
856            break;
857         case VexRegUpdAllregsAtEachInsn:
858            // VexRegUpdAllregsAtEachInsn cannot happen here.
859            // fall through
860         case VexRegUpd_INVALID:
861         default:
862            vassert(0);
863      }
864   } /* if (memRW) */
865
866}
867
868
869/* Scan backwards, building up a set of (min offset, max
870   offset) pairs, indicating those parts of the guest state
871   for which the next event is a write.
872
873   On seeing a conditional exit, empty the set.
874
875   On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876   completely within the set, remove the Put.  Otherwise, add
877   (minoff,maxoff) to the set.
878
879   On seeing 'Get (minoff,maxoff)', remove any part of the set
880   overlapping (minoff,maxoff).  The same has to happen for any events
881   which implicitly read parts of the guest state: dirty helper calls
882   and loads/stores.
883*/
884
885static void redundant_put_removal_BB (
886               IRSB* bb,
887               Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888               VexRegisterUpdates pxControl
889            )
890{
891   Int     i, j;
892   Bool    isPut;
893   IRStmt* st;
894   UInt    key = 0; /* keep gcc -O happy */
895
896   vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897
898   HashHW* env = newHHW();
899
900   /* Initialise the running env with the fact that the final exit
901      writes the IP (or, whatever it claims to write.  We don't
902      care.) */
903   key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904   addToHHW(env, (HWord)key, 0);
905
906   /* And now scan backwards through the statements. */
907   for (i = bb->stmts_used-1; i >= 0; i--) {
908      st = bb->stmts[i];
909
910      if (st->tag == Ist_NoOp)
911         continue;
912
913      /* Deal with conditional exits. */
914      if (st->tag == Ist_Exit) {
915         //Bool re_add;
916         /* Need to throw out from the env, any part of it which
917            doesn't overlap with the guest state written by this exit.
918            Since the exit only writes one section, it's simplest to
919            do this: (1) check whether env contains a write that
920            completely overlaps the write done by this exit; (2) empty
921            out env; and (3) if (1) was true, add the write done by
922            this exit.
923
924            To make (1) a bit simpler, merely search for a write that
925            exactly matches the one done by this exit.  That's safe
926            because it will fail as often or more often than a full
927            overlap check, and failure to find an overlapping write in
928            env is the safe case (we just nuke env if that
929            happens). */
930         //vassert(isIRAtom(st->Ist.Exit.guard));
931         /* (1) */
932         //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933         //                    typeOfIRConst(st->Ist.Exit.dst));
934         //re_add = lookupHHW(env, NULL, key);
935         /* (2) */
936         for (j = 0; j < env->used; j++)
937            env->inuse[j] = False;
938         /* (3) */
939         //if (0 && re_add)
940         //   addToHHW(env, (HWord)key, 0);
941         continue;
942      }
943
944      /* Deal with Puts */
945      switch (st->tag) {
946         case Ist_Put:
947            isPut = True;
948            key = mk_key_GetPut( st->Ist.Put.offset,
949                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950            vassert(isIRAtom(st->Ist.Put.data));
951            break;
952         case Ist_PutI:
953            isPut = True;
954            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955            vassert(isIRAtom(st->Ist.PutI.details->ix));
956            vassert(isIRAtom(st->Ist.PutI.details->data));
957            break;
958         default:
959            isPut = False;
960      }
961      if (isPut && st->tag != Ist_PutI) {
962         /* See if any single entry in env overlaps this Put.  This is
963            simplistic in that the transformation is valid if, say, two
964            or more entries in the env overlap this Put, but the use of
965            lookupHHW will only find a single entry which exactly
966            overlaps this Put.  This is suboptimal but safe. */
967         if (lookupHHW(env, NULL, (HWord)key)) {
968            /* This Put is redundant because a later one will overwrite
969               it.  So NULL (nop) it out. */
970            if (DEBUG_IROPT) {
971               vex_printf("rPUT: "); ppIRStmt(st);
972               vex_printf("\n");
973            }
974            bb->stmts[i] = IRStmt_NoOp();
975         } else {
976            /* We can't demonstrate that this Put is redundant, so add it
977               to the running collection. */
978            addToHHW(env, (HWord)key, 0);
979         }
980         continue;
981      }
982
983      /* Deal with Gets.  These remove bits of the environment since
984         appearance of a Get means that the next event for that slice
985         of the guest state is no longer a write, but a read.  Also
986         deals with implicit reads of guest state needed to maintain
987         precise exceptions. */
988      handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989   }
990}
991
992
993/*---------------------------------------------------------------*/
994/*--- Constant propagation and folding                        ---*/
995/*---------------------------------------------------------------*/
996
997#if STATS_IROPT
998/* How often sameIRExprs was invoked */
999static UInt invocation_count;
1000/* How often sameIRExprs recursed through IRTemp assignments */
1001static UInt recursion_count;
1002/* How often sameIRExprs found identical IRExprs */
1003static UInt success_count;
1004/* How often recursing through assignments to IRTemps helped
1005   establishing equality. */
1006static UInt recursion_success_count;
1007/* Whether or not recursing through an IRTemp assignment helped
1008   establishing IRExpr equality for a given sameIRExprs invocation. */
1009static Bool recursion_helped;
1010/* Whether or not a given sameIRExprs invocation recursed through an
1011   IRTemp assignment */
1012static Bool recursed;
1013/* Maximum number of nodes ever visited when comparing two IRExprs. */
1014static UInt max_nodes_visited;
1015#endif /* STATS_IROPT */
1016
1017/* Count the number of nodes visited for a given sameIRExprs invocation. */
1018static UInt num_nodes_visited;
1019
1020/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021   This is to guard against performance degradation by visiting large
1022   trees without success. */
1023#define NODE_LIMIT 30
1024
1025
1026/* The env in this section is a map from IRTemp to IRExpr*,
1027   that is, an array indexed by IRTemp. */
1028
1029/* Do both expressions compute the same value? The answer is generally
1030   conservative, i.e. it will report that the expressions do not compute
1031   the same value when in fact they do. The reason is that we do not
1032   keep track of changes in the guest state and memory. Thusly, two
1033   Get's, GetI's or Load's, even when accessing the same location, will be
1034   assumed to compute different values. After all the accesses may happen
1035   at different times and the guest state / memory can have changed in
1036   the meantime.
1037
1038   XXX IMPORTANT XXX the two expressions must have the same IR type.
1039   DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040
1041/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042   wrapper that deals with the common tags-don't-match case, and a
1043   slower out of line general case.  Saves a few insns. */
1044
1045__attribute__((noinline))
1046static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047
1048inline
1049static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050{
1051   if (e1->tag != e2->tag) return False;
1052   return sameIRExprs_aux2(env, e1, e2);
1053}
1054
1055__attribute__((noinline))
1056static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057{
1058   if (num_nodes_visited++ > NODE_LIMIT) return False;
1059
1060   switch (e1->tag) {
1061      case Iex_RdTmp:
1062         if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063
1064         if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065            Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066                                        env[e2->Iex.RdTmp.tmp]);
1067#if STATS_IROPT
1068            recursed = True;
1069            if (same) recursion_helped = True;
1070#endif
1071            return same;
1072         }
1073         return False;
1074
1075      case Iex_Get:
1076      case Iex_GetI:
1077      case Iex_Load:
1078         /* Guest state / memory could have changed in the meantime. */
1079         return False;
1080
1081      case Iex_Binop:
1082         return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083                        && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084                                                 e2->Iex.Binop.arg1 )
1085                        && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086                                                 e2->Iex.Binop.arg2 ));
1087
1088      case Iex_Unop:
1089         return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090                        && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091                                                 e2->Iex.Unop.arg ));
1092
1093      case Iex_Const: {
1094         IRConst *c1 = e1->Iex.Const.con;
1095         IRConst *c2 = e2->Iex.Const.con;
1096         vassert(c1->tag == c2->tag);
1097         switch (c1->tag) {
1098            case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
1099            case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
1100            case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101            case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102            case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103            default: break;
1104         }
1105         return False;
1106      }
1107
1108      case Iex_Triop: {
1109         IRTriop *tri1 = e1->Iex.Triop.details;
1110         IRTriop *tri2 = e2->Iex.Triop.details;
1111         return toBool( tri1->op == tri2->op
1112                        && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113                        && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114                        && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115      }
1116
1117      case Iex_ITE:
1118         return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119                                                 e2->Iex.ITE.cond )
1120                        && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121                                                 e2->Iex.ITE.iftrue )
1122                        && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123                                                 e2->Iex.ITE.iffalse ));
1124
1125      default:
1126         /* Not very likely to be "same". */
1127         break;
1128   }
1129
1130   return False;
1131}
1132
1133inline
1134static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135{
1136   Bool same;
1137
1138   num_nodes_visited = 0;
1139   same = sameIRExprs_aux(env, e1, e2);
1140
1141#if STATS_IROPT
1142   ++invocation_count;
1143   if (recursed) ++recursion_count;
1144   success_count += same;
1145   if (same && recursion_helped)
1146      ++recursion_success_count;
1147   if (num_nodes_visited > max_nodes_visited)
1148      max_nodes_visited = num_nodes_visited;
1149   recursed = False; /* reset */
1150   recursion_helped = False;  /* reset */
1151#endif /* STATS_IROPT */
1152
1153   return same;
1154}
1155
1156
1157/* Debugging-only hack (not used in production runs): make a guess
1158   whether sameIRExprs might assert due to the two args being of
1159   different types.  If in doubt return False.  Is only used when
1160   --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161   Bad because it duplicates functionality from typeOfIRExpr.  See
1162   comment on the single use point below for rationale. */
1163static
1164Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165{
1166   if (e1->tag != e2->tag) return False;
1167   switch (e1->tag) {
1168      case Iex_Const: {
1169         /* The only interesting case */
1170         IRConst *c1 = e1->Iex.Const.con;
1171         IRConst *c2 = e2->Iex.Const.con;
1172         return c1->tag != c2->tag;
1173      }
1174      default:
1175         break;
1176   }
1177   return False;
1178}
1179
1180
1181/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1182static Bool isZeroU32 ( IRExpr* e )
1183{
1184   return toBool( e->tag == Iex_Const
1185                  && e->Iex.Const.con->tag == Ico_U32
1186                  && e->Iex.Const.con->Ico.U32 == 0);
1187}
1188
1189/* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190   Currently unused; commented out to avoid compiler warning */
1191#if 0
1192static Bool isZeroU64 ( IRExpr* e )
1193{
1194   return toBool( e->tag == Iex_Const
1195                  && e->Iex.Const.con->tag == Ico_U64
1196                  && e->Iex.Const.con->Ico.U64 == 0);
1197}
1198#endif
1199
1200/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
1201static Bool isZeroV128 ( IRExpr* e )
1202{
1203   return toBool( e->tag == Iex_Const
1204                  && e->Iex.Const.con->tag == Ico_V128
1205                  && e->Iex.Const.con->Ico.V128 == 0x0000);
1206}
1207
1208/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
1209static Bool isZeroV256 ( IRExpr* e )
1210{
1211   return toBool( e->tag == Iex_Const
1212                  && e->Iex.Const.con->tag == Ico_V256
1213                  && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214}
1215
1216/* Is this an integer constant with value 0 ? */
1217static Bool isZeroU ( IRExpr* e )
1218{
1219   if (e->tag != Iex_Const) return False;
1220   switch (e->Iex.Const.con->tag) {
1221      case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
1222      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
1223      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226      default: vpanic("isZeroU");
1227   }
1228}
1229
1230/* Is this an integer constant with value 1---1b ? */
1231static Bool isOnesU ( IRExpr* e )
1232{
1233   if (e->tag != Iex_Const) return False;
1234   switch (e->Iex.Const.con->tag) {
1235      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
1236      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1237      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
1238                                     == 0xFFFFFFFF);
1239      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
1240                                     == 0xFFFFFFFFFFFFFFFFULL);
1241      default: ppIRExpr(e); vpanic("isOnesU");
1242   }
1243}
1244
1245static Bool notBool ( Bool b )
1246{
1247   if (b == True) return False;
1248   if (b == False) return True;
1249   vpanic("notBool");
1250}
1251
1252/* Make a zero which has the same type as the result of the given
1253   primop. */
1254static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1255{
1256   switch (op) {
1257      case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1258      case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
1259      case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1260      case Iop_Sub32:
1261      case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1262      case Iop_And64:
1263      case Iop_Sub64:
1264      case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1265      case Iop_XorV128:
1266      case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1267      case Iop_XorV256:
1268      case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1269      default: vpanic("mkZeroOfPrimopResultType: bad primop");
1270   }
1271}
1272
1273/* Make a value containing all 1-bits, which has the same type as the
1274   result of the given primop. */
1275static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1276{
1277   switch (op) {
1278      case Iop_CmpEQ32:
1279      case Iop_CmpEQ64:
1280         return IRExpr_Const(IRConst_U1(toBool(1)));
1281      case Iop_Or8:
1282         return IRExpr_Const(IRConst_U8(0xFF));
1283      case Iop_Or16:
1284         return IRExpr_Const(IRConst_U16(0xFFFF));
1285      case Iop_Or32:
1286         return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1287      case Iop_CmpEQ8x8:
1288      case Iop_Or64:
1289         return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1290      case Iop_CmpEQ8x16:
1291      case Iop_CmpEQ16x8:
1292      case Iop_CmpEQ32x4:
1293         return IRExpr_Const(IRConst_V128(0xFFFF));
1294      default:
1295         ppIROp(op);
1296         vpanic("mkOnesOfPrimopResultType: bad primop");
1297   }
1298}
1299
1300/* Helpers for folding Clz32/64. */
1301static UInt fold_Clz64 ( ULong value )
1302{
1303   UInt i;
1304   vassert(value != 0ULL); /* no defined semantics for arg==0 */
1305   for (i = 0; i < 64; ++i) {
1306      if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1307   }
1308   vassert(0);
1309   /*NOTREACHED*/
1310   return 0;
1311}
1312
1313static UInt fold_Clz32 ( UInt value )
1314{
1315   UInt i;
1316   vassert(value != 0); /* no defined semantics for arg==0 */
1317   for (i = 0; i < 32; ++i) {
1318      if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1319   }
1320   vassert(0);
1321   /*NOTREACHED*/
1322   return 0;
1323}
1324
1325/* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
1326   the corresponding real constant. */
1327//XXX re-check this before use
1328//static ULong de_summarise_V64 ( UChar v64 )
1329//{
1330//   ULong r = 0;
1331//   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1332//   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1333//   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1334//   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1335//   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1336//   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1337//   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1338//   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1339//   return r;
1340//}
1341
1342/* Helper for arbitrary expression pattern matching in flat IR.  If
1343   'e' is a reference to a tmp, look it up in env -- repeatedly, if
1344   necessary -- until it resolves to a non-tmp.  Note that this can
1345   return NULL if it can't resolve 'e' to a new expression, which will
1346   be the case if 'e' is instead defined by an IRStmt (IRDirty or
1347   LLSC). */
1348static IRExpr* chase ( IRExpr** env, IRExpr* e )
1349{
1350   /* Why is this loop guaranteed to terminate?  Because all tmps must
1351      have definitions before use, hence a tmp cannot be bound
1352      (directly or indirectly) to itself. */
1353   while (e->tag == Iex_RdTmp) {
1354      if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1355      e = env[(Int)e->Iex.RdTmp.tmp];
1356      if (e == NULL) break;
1357   }
1358   return e;
1359}
1360
1361/* Similar to |chase|, but follows at most one level of tmp reference. */
1362static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1363{
1364   if (e == NULL || e->tag != Iex_RdTmp)
1365      return e;
1366   else
1367      return env[(Int)e->Iex.RdTmp.tmp];
1368}
1369
1370static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1371{
1372   Int     shift;
1373   IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1374
1375   switch (e->tag) {
1376   case Iex_Unop:
1377      /* UNARY ops */
1378      if (e->Iex.Unop.arg->tag == Iex_Const) {
1379         switch (e->Iex.Unop.op) {
1380         case Iop_1Uto8:
1381            e2 = IRExpr_Const(IRConst_U8(toUChar(
1382                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1383                    ? 1 : 0)));
1384            break;
1385         case Iop_1Uto32:
1386            e2 = IRExpr_Const(IRConst_U32(
1387                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1388                    ? 1 : 0));
1389            break;
1390         case Iop_1Uto64:
1391            e2 = IRExpr_Const(IRConst_U64(
1392                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1393                    ? 1 : 0));
1394            break;
1395
1396         case Iop_1Sto8:
1397            e2 = IRExpr_Const(IRConst_U8(toUChar(
1398                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1399                    ? 0xFF : 0)));
1400            break;
1401         case Iop_1Sto16:
1402            e2 = IRExpr_Const(IRConst_U16(toUShort(
1403                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1404                    ? 0xFFFF : 0)));
1405            break;
1406         case Iop_1Sto32:
1407            e2 = IRExpr_Const(IRConst_U32(
1408                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1409                    ? 0xFFFFFFFF : 0));
1410            break;
1411         case Iop_1Sto64:
1412            e2 = IRExpr_Const(IRConst_U64(
1413                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1414                    ? 0xFFFFFFFFFFFFFFFFULL : 0));
1415            break;
1416
1417         case Iop_8Sto32: {
1418            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1419            u32 <<= 24;
1420            u32 = (Int)u32 >> 24;   /* signed shift */
1421            e2 = IRExpr_Const(IRConst_U32(u32));
1422            break;
1423         }
1424         case Iop_16Sto32: {
1425            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1426            u32 <<= 16;
1427            u32 = (Int)u32 >> 16;   /* signed shift */
1428            e2 = IRExpr_Const(IRConst_U32(u32));
1429            break;
1430         }
1431         case Iop_8Uto64:
1432            e2 = IRExpr_Const(IRConst_U64(
1433                    0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1434            break;
1435         case Iop_16Uto64:
1436            e2 = IRExpr_Const(IRConst_U64(
1437                    0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1438            break;
1439         case Iop_8Uto32:
1440            e2 = IRExpr_Const(IRConst_U32(
1441                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1442            break;
1443         case Iop_8Sto16: {
1444            UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1445            u16 <<= 8;
1446            u16 = (Short)u16 >> 8;  /* signed shift */
1447            e2 = IRExpr_Const(IRConst_U16(u16));
1448            break;
1449         }
1450         case Iop_8Uto16:
1451            e2 = IRExpr_Const(IRConst_U16(
1452                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1453            break;
1454         case Iop_16Uto32:
1455            e2 = IRExpr_Const(IRConst_U32(
1456                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1457            break;
1458         case Iop_32to16:
1459            e2 = IRExpr_Const(IRConst_U16(toUShort(
1460                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1461            break;
1462         case Iop_32to8:
1463            e2 = IRExpr_Const(IRConst_U8(toUChar(
1464                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1465            break;
1466         case Iop_32to1:
1467            e2 = IRExpr_Const(IRConst_U1(toBool(
1468                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1469                 )));
1470            break;
1471         case Iop_64to1:
1472            e2 = IRExpr_Const(IRConst_U1(toBool(
1473                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1474                 )));
1475            break;
1476
1477         case Iop_NotV128:
1478            e2 = IRExpr_Const(IRConst_V128(
1479                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1480            break;
1481         case Iop_Not64:
1482            e2 = IRExpr_Const(IRConst_U64(
1483                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1484            break;
1485         case Iop_Not32:
1486            e2 = IRExpr_Const(IRConst_U32(
1487                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1488            break;
1489         case Iop_Not16:
1490            e2 = IRExpr_Const(IRConst_U16(toUShort(
1491                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1492            break;
1493         case Iop_Not8:
1494            e2 = IRExpr_Const(IRConst_U8(toUChar(
1495                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1496            break;
1497
1498         case Iop_Not1:
1499            e2 = IRExpr_Const(IRConst_U1(
1500                    notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1501            break;
1502
1503         case Iop_64to8: {
1504            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1505            w64 &= 0xFFULL;
1506            e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1507            break;
1508         }
1509         case Iop_64to16: {
1510            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1511            w64 &= 0xFFFFULL;
1512            e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1513            break;
1514         }
1515         case Iop_64to32: {
1516            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1517            w64 &= 0x00000000FFFFFFFFULL;
1518            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1519            break;
1520         }
1521         case Iop_64HIto32: {
1522            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1523            w64 >>= 32;
1524            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1525            break;
1526         }
1527         case Iop_32Uto64:
1528            e2 = IRExpr_Const(IRConst_U64(
1529                    0xFFFFFFFFULL
1530                    & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1531            break;
1532         case Iop_16Sto64: {
1533            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1534            u64 <<= 48;
1535            u64 = (Long)u64 >> 48;   /* signed shift */
1536            e2 = IRExpr_Const(IRConst_U64(u64));
1537            break;
1538         }
1539         case Iop_32Sto64: {
1540            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1541            u64 <<= 32;
1542            u64 = (Long)u64 >> 32;   /* signed shift */
1543            e2 = IRExpr_Const(IRConst_U64(u64));
1544            break;
1545         }
1546
1547         case Iop_16to8: {
1548            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1549            w16 &= 0xFF;
1550            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1551            break;
1552         }
1553         case Iop_16HIto8: {
1554            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1555            w16 >>= 8;
1556            w16 &= 0xFF;
1557            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1558            break;
1559         }
1560
1561         case Iop_CmpNEZ8:
1562            e2 = IRExpr_Const(IRConst_U1(toBool(
1563                    0 !=
1564                    (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1565                 )));
1566            break;
1567         case Iop_CmpNEZ32:
1568            e2 = IRExpr_Const(IRConst_U1(toBool(
1569                    0 !=
1570                    (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1571                 )));
1572            break;
1573         case Iop_CmpNEZ64:
1574            e2 = IRExpr_Const(IRConst_U1(toBool(
1575                    0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1576                 )));
1577            break;
1578
1579         case Iop_CmpwNEZ32: {
1580            UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1581            if (w32 == 0)
1582               e2 = IRExpr_Const(IRConst_U32( 0 ));
1583            else
1584               e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1585            break;
1586         }
1587         case Iop_CmpwNEZ64: {
1588            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1589            if (w64 == 0)
1590               e2 = IRExpr_Const(IRConst_U64( 0 ));
1591            else
1592               e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1593            break;
1594         }
1595
1596         case Iop_Left32: {
1597            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1598            Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1599            s32 = (s32 | (-s32));
1600            e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1601            break;
1602         }
1603
1604         case Iop_Left64: {
1605            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1606            Long  s64 = (Long)u64;
1607            s64 = (s64 | (-s64));
1608            e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1609            break;
1610         }
1611
1612         case Iop_Clz32: {
1613            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1614            if (u32 != 0)
1615               e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1616            break;
1617         }
1618         case Iop_Clz64: {
1619            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1620            if (u64 != 0ULL)
1621               e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1622            break;
1623         }
1624
1625         /* For these vector ones, can't fold all cases, but at least
1626            do the most obvious one.  Could do better here using
1627            summarise/desummarise of vector constants, but too
1628            difficult to verify; hence just handle the zero cases. */
1629         case Iop_32UtoV128: {
1630            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1631            if (0 == u32) {
1632               e2 = IRExpr_Const(IRConst_V128(0x0000));
1633            } else {
1634               goto unhandled;
1635            }
1636            break;
1637         }
1638         case Iop_V128to64: {
1639            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1640            if (0 == ((v128 >> 0) & 0xFF)) {
1641               e2 = IRExpr_Const(IRConst_U64(0));
1642            } else {
1643               goto unhandled;
1644            }
1645            break;
1646         }
1647         case Iop_V128HIto64: {
1648            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1649            if (0 == ((v128 >> 8) & 0xFF)) {
1650               e2 = IRExpr_Const(IRConst_U64(0));
1651            } else {
1652               goto unhandled;
1653            }
1654            break;
1655         }
1656         case Iop_64UtoV128: {
1657            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1658            if (0 == u64) {
1659               e2 = IRExpr_Const(IRConst_V128(0x0000));
1660            } else {
1661               goto unhandled;
1662            }
1663            break;
1664         }
1665
1666         /* Even stupider (although still correct ..) */
1667         case Iop_V256to64_0: case Iop_V256to64_1:
1668         case Iop_V256to64_2: case Iop_V256to64_3: {
1669            UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1670            if (v256 == 0x00000000) {
1671               e2 = IRExpr_Const(IRConst_U64(0));
1672            } else {
1673               goto unhandled;
1674            }
1675            break;
1676         }
1677
1678         case Iop_ZeroHI64ofV128: {
1679            /* Could do better here -- only need to look at the bottom 64 bits
1680               of the argument, really. */
1681            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1682            if (v128 == 0x0000) {
1683               e2 = IRExpr_Const(IRConst_V128(0x0000));
1684            } else {
1685               goto unhandled;
1686            }
1687            break;
1688         }
1689
1690         default:
1691            goto unhandled;
1692      }
1693      }
1694      break;
1695
1696   case Iex_Binop:
1697      /* BINARY ops */
1698      if (e->Iex.Binop.arg1->tag == Iex_Const
1699          && e->Iex.Binop.arg2->tag == Iex_Const) {
1700         /* cases where both args are consts */
1701         switch (e->Iex.Binop.op) {
1702
1703            /* -- Or -- */
1704            case Iop_Or8:
1705               e2 = IRExpr_Const(IRConst_U8(toUChar(
1706                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1707                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1708               break;
1709            case Iop_Or16:
1710               e2 = IRExpr_Const(IRConst_U16(toUShort(
1711                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1712                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1713               break;
1714            case Iop_Or32:
1715               e2 = IRExpr_Const(IRConst_U32(
1716                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1717                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1718               break;
1719            case Iop_Or64:
1720               e2 = IRExpr_Const(IRConst_U64(
1721                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1722                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1723               break;
1724            case Iop_OrV128:
1725               e2 = IRExpr_Const(IRConst_V128(
1726                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1727                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1728               break;
1729
1730            /* -- Xor -- */
1731            case Iop_Xor8:
1732               e2 = IRExpr_Const(IRConst_U8(toUChar(
1733                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1734                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1735               break;
1736            case Iop_Xor16:
1737               e2 = IRExpr_Const(IRConst_U16(toUShort(
1738                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1739                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1740               break;
1741            case Iop_Xor32:
1742               e2 = IRExpr_Const(IRConst_U32(
1743                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1744                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1745               break;
1746            case Iop_Xor64:
1747               e2 = IRExpr_Const(IRConst_U64(
1748                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1749                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1750               break;
1751            case Iop_XorV128:
1752               e2 = IRExpr_Const(IRConst_V128(
1753                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1754                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1755               break;
1756
1757            /* -- And -- */
1758            case Iop_And8:
1759               e2 = IRExpr_Const(IRConst_U8(toUChar(
1760                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1761                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1762               break;
1763            case Iop_And16:
1764               e2 = IRExpr_Const(IRConst_U16(toUShort(
1765                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1766                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1767               break;
1768            case Iop_And32:
1769               e2 = IRExpr_Const(IRConst_U32(
1770                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1771                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1772               break;
1773            case Iop_And64:
1774               e2 = IRExpr_Const(IRConst_U64(
1775                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1776                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1777               break;
1778            case Iop_AndV128:
1779               e2 = IRExpr_Const(IRConst_V128(
1780                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1781                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1782               break;
1783
1784            /* -- Add -- */
1785            case Iop_Add8:
1786               e2 = IRExpr_Const(IRConst_U8(toUChar(
1787                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1788                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1789               break;
1790            case Iop_Add32:
1791               e2 = IRExpr_Const(IRConst_U32(
1792                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1793                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1794               break;
1795            case Iop_Add64:
1796               e2 = IRExpr_Const(IRConst_U64(
1797                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1798                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1799               break;
1800
1801            /* -- Sub -- */
1802            case Iop_Sub8:
1803               e2 = IRExpr_Const(IRConst_U8(toUChar(
1804                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1805                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1806               break;
1807            case Iop_Sub32:
1808               e2 = IRExpr_Const(IRConst_U32(
1809                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1810                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1811               break;
1812            case Iop_Sub64:
1813               e2 = IRExpr_Const(IRConst_U64(
1814                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1815                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1816               break;
1817
1818            /* -- Max32U -- */
1819            case Iop_Max32U: {
1820               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1821               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1822               UInt res  = u32a > u32b ? u32a : u32b;
1823               e2 = IRExpr_Const(IRConst_U32(res));
1824               break;
1825            }
1826
1827            /* -- Mul -- */
1828            case Iop_Mul32:
1829               e2 = IRExpr_Const(IRConst_U32(
1830                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1831                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1832               break;
1833            case Iop_Mul64:
1834               e2 = IRExpr_Const(IRConst_U64(
1835                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1836                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1837               break;
1838
1839            case Iop_MullS32: {
1840               /* very paranoid */
1841               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1842               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1843               Int   s32a = (Int)u32a;
1844               Int   s32b = (Int)u32b;
1845               Long  s64a = (Long)s32a;
1846               Long  s64b = (Long)s32b;
1847               Long  sres = s64a * s64b;
1848               ULong ures = (ULong)sres;
1849               e2 = IRExpr_Const(IRConst_U64(ures));
1850               break;
1851            }
1852
1853            /* -- Shl -- */
1854            case Iop_Shl32:
1855               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1856               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1857               if (shift >= 0 && shift <= 31)
1858                  e2 = IRExpr_Const(IRConst_U32(
1859                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1860                           << shift)));
1861               break;
1862            case Iop_Shl64:
1863               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1864               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1865               if (shift >= 0 && shift <= 63)
1866                  e2 = IRExpr_Const(IRConst_U64(
1867                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1868                           << shift)));
1869               break;
1870
1871            /* -- Sar -- */
1872            case Iop_Sar32: {
1873               /* paranoid ... */
1874               /*signed*/ Int s32;
1875               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1876               s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1877               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1878               if (shift >= 0 && shift <= 31) {
1879                  s32 >>=/*signed*/ shift;
1880                  e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1881               }
1882               break;
1883            }
1884            case Iop_Sar64: {
1885               /* paranoid ... */
1886               /*signed*/ Long s64;
1887               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1888               s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1889               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1890               if (shift >= 0 && shift <= 63) {
1891                  s64 >>=/*signed*/ shift;
1892                  e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1893               }
1894               break;
1895            }
1896
1897            /* -- Shr -- */
1898            case Iop_Shr32: {
1899               /* paranoid ... */
1900               /*unsigned*/ UInt u32;
1901               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1902               u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1903               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1904               if (shift >= 0 && shift <= 31) {
1905                  u32 >>=/*unsigned*/ shift;
1906                  e2 = IRExpr_Const(IRConst_U32(u32));
1907               }
1908               break;
1909            }
1910            case Iop_Shr64: {
1911               /* paranoid ... */
1912               /*unsigned*/ ULong u64;
1913               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1914               u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1915               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1916               if (shift >= 0 && shift <= 63) {
1917                  u64 >>=/*unsigned*/ shift;
1918                  e2 = IRExpr_Const(IRConst_U64(u64));
1919               }
1920               break;
1921            }
1922
1923            /* -- CmpEQ -- */
1924            case Iop_CmpEQ32:
1925               e2 = IRExpr_Const(IRConst_U1(toBool(
1926                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1927                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1928               break;
1929            case Iop_CmpEQ64:
1930               e2 = IRExpr_Const(IRConst_U1(toBool(
1931                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1932                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1933               break;
1934
1935            /* -- CmpNE -- */
1936            case Iop_CmpNE8:
1937            case Iop_CasCmpNE8:
1938            case Iop_ExpCmpNE8:
1939               e2 = IRExpr_Const(IRConst_U1(toBool(
1940                       ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1941                        != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1942               break;
1943            case Iop_CmpNE32:
1944            case Iop_CasCmpNE32:
1945            case Iop_ExpCmpNE32:
1946               e2 = IRExpr_Const(IRConst_U1(toBool(
1947                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1948                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1949               break;
1950            case Iop_CmpNE64:
1951            case Iop_CasCmpNE64:
1952            case Iop_ExpCmpNE64:
1953               e2 = IRExpr_Const(IRConst_U1(toBool(
1954                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1955                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1956               break;
1957
1958            /* -- CmpLEU -- */
1959            case Iop_CmpLE32U:
1960               e2 = IRExpr_Const(IRConst_U1(toBool(
1961                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1962                        <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1963               break;
1964            case Iop_CmpLE64U:
1965               e2 = IRExpr_Const(IRConst_U1(toBool(
1966                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1967                        <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1968               break;
1969
1970            /* -- CmpLES -- */
1971            case Iop_CmpLE32S:
1972               e2 = IRExpr_Const(IRConst_U1(toBool(
1973                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1974                        <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1975               break;
1976            case Iop_CmpLE64S:
1977               e2 = IRExpr_Const(IRConst_U1(toBool(
1978                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1979                        <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1980               break;
1981
1982            /* -- CmpLTS -- */
1983            case Iop_CmpLT32S:
1984               e2 = IRExpr_Const(IRConst_U1(toBool(
1985                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1986                        < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1987               break;
1988            case Iop_CmpLT64S:
1989               e2 = IRExpr_Const(IRConst_U1(toBool(
1990                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1991                        < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1992               break;
1993
1994            /* -- CmpLTU -- */
1995            case Iop_CmpLT32U:
1996               e2 = IRExpr_Const(IRConst_U1(toBool(
1997                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1998                        < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1999               break;
2000            case Iop_CmpLT64U:
2001               e2 = IRExpr_Const(IRConst_U1(toBool(
2002                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2003                        < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2004               break;
2005
2006            /* -- CmpORD -- */
2007            case Iop_CmpORD32S: {
2008               /* very paranoid */
2009               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2010               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2011               Int   s32a = (Int)u32a;
2012               Int   s32b = (Int)u32b;
2013               Int   r = 0x2; /* EQ */
2014               if (s32a < s32b) {
2015                  r = 0x8; /* LT */
2016               }
2017               else if (s32a > s32b) {
2018                  r = 0x4; /* GT */
2019               }
2020               e2 = IRExpr_Const(IRConst_U32(r));
2021               break;
2022            }
2023
2024            /* -- nHLto2n -- */
2025            case Iop_32HLto64:
2026               e2 = IRExpr_Const(IRConst_U64(
2027                       (((ULong)(e->Iex.Binop.arg1
2028                                  ->Iex.Const.con->Ico.U32)) << 32)
2029                       | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2030                    ));
2031               break;
2032            case Iop_64HLto128:
2033               /* We can't fold this, because there is no way to
2034                  express he result in IR, but at least pretend to
2035                  handle it, so as to stop getting blasted with
2036                  no-rule-for-this-primop messages. */
2037               break;
2038            /* For this vector one, can't fold all cases, but at
2039               least do the most obvious one.  Could do better here
2040               using summarise/desummarise of vector constants, but
2041               too difficult to verify; hence just handle the zero
2042               cases. */
2043            case Iop_64HLtoV128: {
2044               ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2045               ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2046               if (0 == argHi && 0 == argLo) {
2047                  e2 = IRExpr_Const(IRConst_V128(0));
2048               } else {
2049                  goto unhandled;
2050               }
2051               break;
2052            }
2053            /* Same reasoning for the 256-bit version. */
2054            case Iop_V128HLtoV256: {
2055               IRExpr* argHi = e->Iex.Binop.arg1;
2056               IRExpr* argLo = e->Iex.Binop.arg2;
2057               if (isZeroV128(argHi) && isZeroV128(argLo)) {
2058                  e2 = IRExpr_Const(IRConst_V256(0));
2059               } else {
2060                  goto unhandled;
2061               }
2062               break;
2063            }
2064
2065            /* -- V128 stuff -- */
2066            case Iop_InterleaveLO8x16: {
2067               /* This turns up a lot in Memcheck instrumentation of
2068                  Icc generated code.  I don't know why. */
2069               UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2070               UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2071               if (0 == arg1 && 0 == arg2) {
2072                  e2 = IRExpr_Const(IRConst_V128(0));
2073               } else {
2074                  goto unhandled;
2075               }
2076               break;
2077            }
2078
2079            default:
2080               goto unhandled;
2081         }
2082
2083      } else {
2084
2085         /* other cases (identities, etc) */
2086         switch (e->Iex.Binop.op) {
2087
2088            case Iop_Shl32:
2089            case Iop_Shl64:
2090            case Iop_Shr64:
2091            case Iop_Sar64:
2092               /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2093               if (isZeroU(e->Iex.Binop.arg2)) {
2094                  e2 = e->Iex.Binop.arg1;
2095                  break;
2096               }
2097               /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2098               if (isZeroU(e->Iex.Binop.arg1)) {
2099                  e2 = e->Iex.Binop.arg1;
2100                  break;
2101               }
2102               break;
2103
2104            case Iop_Sar32:
2105            case Iop_Shr32:
2106               /* Shr32/Sar32(x,0) ==> x */
2107               if (isZeroU(e->Iex.Binop.arg2)) {
2108                  e2 = e->Iex.Binop.arg1;
2109                  break;
2110               }
2111               break;
2112
2113            case Iop_Or8:
2114            case Iop_Or16:
2115            case Iop_Or32:
2116            case Iop_Or64:
2117            case Iop_Max32U:
2118               /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2119               if (isZeroU(e->Iex.Binop.arg2)) {
2120                  e2 = e->Iex.Binop.arg1;
2121                  break;
2122               }
2123               /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2124               if (isZeroU(e->Iex.Binop.arg1)) {
2125                  e2 = e->Iex.Binop.arg2;
2126                  break;
2127               }
2128               /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2129               /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2130               if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2131                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2132                  break;
2133               }
2134               /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2135               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2136                  e2 = e->Iex.Binop.arg1;
2137                  break;
2138               }
2139               break;
2140
2141            case Iop_Add8:
2142               /* Add8(t,t) ==> t << 1.
2143                  Memcheck doesn't understand that
2144                  x+x produces a defined least significant bit, and it seems
2145                  simplest just to get rid of the problem by rewriting it
2146                  out, since the opportunity to do so exists. */
2147               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2148                  e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2149                                    IRExpr_Const(IRConst_U8(1)));
2150                  break;
2151               }
2152               break;
2153
2154               /* NB no Add16(t,t) case yet as no known test case exists */
2155
2156            case Iop_Add32:
2157            case Iop_Add64:
2158               /* Add32/Add64(x,0) ==> x */
2159               if (isZeroU(e->Iex.Binop.arg2)) {
2160                  e2 = e->Iex.Binop.arg1;
2161                  break;
2162               }
2163               /* Add32/Add64(0,x) ==> x */
2164               if (isZeroU(e->Iex.Binop.arg1)) {
2165                  e2 = e->Iex.Binop.arg2;
2166                  break;
2167               }
2168               /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2169               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2170                  e2 = IRExpr_Binop(
2171                          e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2172                          e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2173                  break;
2174               }
2175               break;
2176
2177            case Iop_Sub32:
2178            case Iop_Sub64:
2179               /* Sub32/Sub64(x,0) ==> x */
2180               if (isZeroU(e->Iex.Binop.arg2)) {
2181                  e2 = e->Iex.Binop.arg1;
2182                  break;
2183               }
2184               /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2185               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2186                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2187                  break;
2188               }
2189               break;
2190            case Iop_Sub8x16:
2191               /* Sub8x16(x,0) ==> x */
2192               if (isZeroV128(e->Iex.Binop.arg2)) {
2193                  e2 = e->Iex.Binop.arg1;
2194                  break;
2195               }
2196               break;
2197
2198            case Iop_And8:
2199            case Iop_And16:
2200            case Iop_And32:
2201            case Iop_And64:
2202               /* And8/And16/And32/And64(x,1---1b) ==> x */
2203               if (isOnesU(e->Iex.Binop.arg2)) {
2204                  e2 = e->Iex.Binop.arg1;
2205                  break;
2206               }
2207               /* And8/And16/And32/And64(1---1b,x) ==> x */
2208               if (isOnesU(e->Iex.Binop.arg1)) {
2209                  e2 = e->Iex.Binop.arg2;
2210                  break;
2211               }
2212               /* And8/And16/And32/And64(x,0) ==> 0 */
2213               if (isZeroU(e->Iex.Binop.arg2)) {
2214                  e2 = e->Iex.Binop.arg2;
2215                  break;
2216               }
2217               /* And8/And16/And32/And64(0,x) ==> 0 */
2218               if (isZeroU(e->Iex.Binop.arg1)) {
2219                  e2 = e->Iex.Binop.arg1;
2220                  break;
2221               }
2222               /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2223               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2224                  e2 = e->Iex.Binop.arg1;
2225                  break;
2226               }
2227               break;
2228
2229            case Iop_AndV128:
2230            case Iop_AndV256:
2231               /* And8/And16/AndV128/AndV256(t,t)
2232                  ==> t, for some IRTemp t */
2233               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2234                  e2 = e->Iex.Binop.arg1;
2235                  break;
2236               }
2237               /* Deal with either arg zero.  Could handle other And
2238                  cases here too. */
2239               if (e->Iex.Binop.op == Iop_AndV256
2240                   && (isZeroV256(e->Iex.Binop.arg1)
2241                       || isZeroV256(e->Iex.Binop.arg2))) {
2242                  e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2243                  break;
2244               } else if (e->Iex.Binop.op == Iop_AndV128
2245                          && (isZeroV128(e->Iex.Binop.arg1)
2246                              || isZeroV128(e->Iex.Binop.arg2))) {
2247                  e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2248                  break;
2249               }
2250               break;
2251
2252            case Iop_OrV128:
2253            case Iop_OrV256:
2254               /* V128/V256(t,t) ==> t, for some IRTemp t */
2255               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2256                  e2 = e->Iex.Binop.arg1;
2257                  break;
2258               }
2259               /* OrV128(t,0) ==> t */
2260               if (e->Iex.Binop.op == Iop_OrV128) {
2261                  if (isZeroV128(e->Iex.Binop.arg2)) {
2262                     e2 = e->Iex.Binop.arg1;
2263                     break;
2264                  }
2265                  if (isZeroV128(e->Iex.Binop.arg1)) {
2266                     e2 = e->Iex.Binop.arg2;
2267                     break;
2268                  }
2269               }
2270               /* OrV256(t,0) ==> t */
2271               if (e->Iex.Binop.op == Iop_OrV256) {
2272                  if (isZeroV256(e->Iex.Binop.arg2)) {
2273                     e2 = e->Iex.Binop.arg1;
2274                     break;
2275                  }
2276                  //Disabled because there's no known test case right now.
2277                  //if (isZeroV256(e->Iex.Binop.arg1)) {
2278                  //   e2 = e->Iex.Binop.arg2;
2279                  //   break;
2280                  //}
2281               }
2282               break;
2283
2284            case Iop_Xor8:
2285            case Iop_Xor16:
2286            case Iop_Xor32:
2287            case Iop_Xor64:
2288            case Iop_XorV128:
2289            case Iop_XorV256:
2290               /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2291               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2292                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2293                  break;
2294               }
2295               /* XorV128(t,0) ==> t */
2296               if (e->Iex.Binop.op == Iop_XorV128) {
2297                  if (isZeroV128(e->Iex.Binop.arg2)) {
2298                     e2 = e->Iex.Binop.arg1;
2299                     break;
2300                  }
2301                  //Disabled because there's no known test case right now.
2302                  //if (isZeroV128(e->Iex.Binop.arg1)) {
2303                  //   e2 = e->Iex.Binop.arg2;
2304                  //   break;
2305                  //}
2306               } else {
2307                  /* Xor8/16/32/64(0,t) ==> t */
2308                  if (isZeroU(e->Iex.Binop.arg1)) {
2309                     e2 = e->Iex.Binop.arg2;
2310                     break;
2311                  }
2312                  /* Xor8/16/32/64(t,0) ==> t */
2313                  if (isZeroU(e->Iex.Binop.arg2)) {
2314                     e2 = e->Iex.Binop.arg1;
2315                     break;
2316                  }
2317               }
2318               break;
2319
2320            case Iop_CmpNE32:
2321               /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2322               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2323                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2324                  break;
2325               }
2326               /* CmpNE32(1Uto32(b), 0) ==> b */
2327               if (isZeroU32(e->Iex.Binop.arg2)) {
2328                  IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2329                  if (a1 && a1->tag == Iex_Unop
2330                         && a1->Iex.Unop.op == Iop_1Uto32) {
2331                     e2 = a1->Iex.Unop.arg;
2332                     break;
2333                  }
2334               }
2335               break;
2336
2337            case Iop_CmpEQ32:
2338            case Iop_CmpEQ64:
2339            case Iop_CmpEQ8x8:
2340            case Iop_CmpEQ8x16:
2341            case Iop_CmpEQ16x8:
2342            case Iop_CmpEQ32x4:
2343               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2344                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2345                  break;
2346               }
2347               break;
2348
2349            default:
2350               break;
2351         }
2352      }
2353      break;
2354
2355   case Iex_ITE:
2356      /* ITE */
2357      /* is the discriminant is a constant? */
2358      if (e->Iex.ITE.cond->tag == Iex_Const) {
2359         /* assured us by the IR type rules */
2360         vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2361         e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2362                 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2363      }
2364      else
2365      /* are the arms identical? (pretty weedy test) */
2366      if (sameIRExprs(env, e->Iex.ITE.iftrue,
2367                           e->Iex.ITE.iffalse)) {
2368         e2 = e->Iex.ITE.iffalse;
2369      }
2370      break;
2371
2372   default:
2373      /* not considered */
2374      break;
2375   }
2376
2377   /* Show cases where we've found but not folded 'op(t,t)'.  Be
2378      careful not to call sameIRExprs with values of different types,
2379      though, else it will assert (and so it should!).  We can't
2380      conveniently call typeOfIRExpr on the two args without a whole
2381      bunch of extra plumbing to pass in a type env, so just use a
2382      hacky test to check the arguments are not anything that might
2383      sameIRExprs to assert.  This is only OK because this kludge is
2384      only used for debug printing, not for "real" operation.  For
2385      "real" operation (ie, all other calls to sameIRExprs), it is
2386      essential that the to args have the same type.
2387
2388      The "right" solution is to plumb the containing block's
2389      IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
2390      that's a bunch of extra parameter passing which will just slow
2391      down the normal case, for no purpose. */
2392   if (vex_control.iropt_verbosity > 0
2393       && e == e2
2394       && e->tag == Iex_Binop
2395       && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2396                                                    e->Iex.Binop.arg2)
2397       && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2398      vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2399      ppIRExpr(e);
2400      vex_printf("\n");
2401   }
2402
2403   /* Show the overall results of folding. */
2404   if (DEBUG_IROPT && e2 != e) {
2405      vex_printf("FOLD: ");
2406      ppIRExpr(e); vex_printf("  ->  ");
2407      ppIRExpr(e2); vex_printf("\n");
2408   }
2409
2410   return e2;
2411
2412 unhandled:
2413#  if 0
2414   vex_printf("\n\n");
2415   ppIRExpr(e);
2416   vpanic("fold_Expr: no rule for the above");
2417#  else
2418   if (vex_control.iropt_verbosity > 0) {
2419      vex_printf("vex iropt: fold_Expr: no const rule for: ");
2420      ppIRExpr(e);
2421      vex_printf("\n");
2422   }
2423   return e2;
2424#  endif
2425}
2426
2427
2428/* Apply the subst to a simple 1-level expression -- guaranteed to be
2429   1-level due to previous flattening pass. */
2430
2431static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2432{
2433   switch (ex->tag) {
2434      case Iex_RdTmp:
2435         if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2436            IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2437            if (rhs->tag == Iex_RdTmp)
2438               return rhs;
2439            if (rhs->tag == Iex_Const
2440                && rhs->Iex.Const.con->tag != Ico_F64i)
2441               return rhs;
2442         }
2443         /* not bound in env */
2444         return ex;
2445
2446      case Iex_Const:
2447      case Iex_Get:
2448         return ex;
2449
2450      case Iex_GetI:
2451         vassert(isIRAtom(ex->Iex.GetI.ix));
2452         return IRExpr_GetI(
2453            ex->Iex.GetI.descr,
2454            subst_Expr(env, ex->Iex.GetI.ix),
2455            ex->Iex.GetI.bias
2456         );
2457
2458      case Iex_Qop: {
2459         IRQop* qop = ex->Iex.Qop.details;
2460         vassert(isIRAtom(qop->arg1));
2461         vassert(isIRAtom(qop->arg2));
2462         vassert(isIRAtom(qop->arg3));
2463         vassert(isIRAtom(qop->arg4));
2464         return IRExpr_Qop(
2465                   qop->op,
2466                   subst_Expr(env, qop->arg1),
2467                   subst_Expr(env, qop->arg2),
2468                   subst_Expr(env, qop->arg3),
2469                   subst_Expr(env, qop->arg4)
2470                );
2471      }
2472
2473      case Iex_Triop: {
2474         IRTriop* triop = ex->Iex.Triop.details;
2475         vassert(isIRAtom(triop->arg1));
2476         vassert(isIRAtom(triop->arg2));
2477         vassert(isIRAtom(triop->arg3));
2478         return IRExpr_Triop(
2479                   triop->op,
2480                   subst_Expr(env, triop->arg1),
2481                   subst_Expr(env, triop->arg2),
2482                   subst_Expr(env, triop->arg3)
2483                );
2484      }
2485
2486      case Iex_Binop:
2487         vassert(isIRAtom(ex->Iex.Binop.arg1));
2488         vassert(isIRAtom(ex->Iex.Binop.arg2));
2489         return IRExpr_Binop(
2490                   ex->Iex.Binop.op,
2491                   subst_Expr(env, ex->Iex.Binop.arg1),
2492                   subst_Expr(env, ex->Iex.Binop.arg2)
2493                );
2494
2495      case Iex_Unop:
2496         vassert(isIRAtom(ex->Iex.Unop.arg));
2497         return IRExpr_Unop(
2498                   ex->Iex.Unop.op,
2499                   subst_Expr(env, ex->Iex.Unop.arg)
2500                );
2501
2502      case Iex_Load:
2503         vassert(isIRAtom(ex->Iex.Load.addr));
2504         return IRExpr_Load(
2505                   ex->Iex.Load.end,
2506                   ex->Iex.Load.ty,
2507                   subst_Expr(env, ex->Iex.Load.addr)
2508                );
2509
2510      case Iex_CCall: {
2511         Int      i;
2512         IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2513         for (i = 0; args2[i]; i++) {
2514            vassert(isIRAtom(args2[i]));
2515            args2[i] = subst_Expr(env, args2[i]);
2516         }
2517         return IRExpr_CCall(
2518                   ex->Iex.CCall.cee,
2519                   ex->Iex.CCall.retty,
2520                   args2
2521                );
2522      }
2523
2524      case Iex_ITE:
2525         vassert(isIRAtom(ex->Iex.ITE.cond));
2526         vassert(isIRAtom(ex->Iex.ITE.iftrue));
2527         vassert(isIRAtom(ex->Iex.ITE.iffalse));
2528         return IRExpr_ITE(
2529                   subst_Expr(env, ex->Iex.ITE.cond),
2530                   subst_Expr(env, ex->Iex.ITE.iftrue),
2531                   subst_Expr(env, ex->Iex.ITE.iffalse)
2532                );
2533
2534      default:
2535         vex_printf("\n\n"); ppIRExpr(ex);
2536         vpanic("subst_Expr");
2537
2538   }
2539}
2540
2541
2542/* Apply the subst to stmt, then fold the result as much as possible.
2543   Much simplified due to stmt being previously flattened.  As a
2544   result of this, the stmt may wind up being turned into a no-op.
2545*/
2546static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2547{
2548#  if 0
2549   vex_printf("\nsubst and fold stmt\n");
2550   ppIRStmt(st);
2551   vex_printf("\n");
2552#  endif
2553
2554   switch (st->tag) {
2555      case Ist_AbiHint:
2556         vassert(isIRAtom(st->Ist.AbiHint.base));
2557         vassert(isIRAtom(st->Ist.AbiHint.nia));
2558         return IRStmt_AbiHint(
2559                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2560                   st->Ist.AbiHint.len,
2561                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2562                );
2563      case Ist_Put:
2564         vassert(isIRAtom(st->Ist.Put.data));
2565         return IRStmt_Put(
2566                   st->Ist.Put.offset,
2567                   fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2568                );
2569
2570      case Ist_PutI: {
2571         IRPutI *puti, *puti2;
2572         puti = st->Ist.PutI.details;
2573         vassert(isIRAtom(puti->ix));
2574         vassert(isIRAtom(puti->data));
2575         puti2 = mkIRPutI(puti->descr,
2576                          fold_Expr(env, subst_Expr(env, puti->ix)),
2577                          puti->bias,
2578                          fold_Expr(env, subst_Expr(env, puti->data)));
2579         return IRStmt_PutI(puti2);
2580      }
2581
2582      case Ist_WrTmp:
2583         /* This is the one place where an expr (st->Ist.WrTmp.data) is
2584            allowed to be more than just a constant or a tmp. */
2585         return IRStmt_WrTmp(
2586                   st->Ist.WrTmp.tmp,
2587                   fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2588                );
2589
2590      case Ist_Store:
2591         vassert(isIRAtom(st->Ist.Store.addr));
2592         vassert(isIRAtom(st->Ist.Store.data));
2593         return IRStmt_Store(
2594                   st->Ist.Store.end,
2595                   fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2596                   fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2597                );
2598
2599      case Ist_StoreG: {
2600         IRStoreG* sg = st->Ist.StoreG.details;
2601         vassert(isIRAtom(sg->addr));
2602         vassert(isIRAtom(sg->data));
2603         vassert(isIRAtom(sg->guard));
2604         IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
2605         IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
2606         IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2607         if (fguard->tag == Iex_Const) {
2608            /* The condition on this store has folded down to a constant. */
2609            vassert(fguard->Iex.Const.con->tag == Ico_U1);
2610            if (fguard->Iex.Const.con->Ico.U1 == False) {
2611               return IRStmt_NoOp();
2612            } else {
2613               vassert(fguard->Iex.Const.con->Ico.U1 == True);
2614               return IRStmt_Store(sg->end, faddr, fdata);
2615            }
2616         }
2617         return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2618      }
2619
2620      case Ist_LoadG: {
2621         /* This is complicated.  If the guard folds down to 'false',
2622            we can replace it with an assignment 'dst := alt', but if
2623            the guard folds down to 'true', we can't conveniently
2624            replace it with an unconditional load, because doing so
2625            requires generating a new temporary, and that is not easy
2626            to do at this point. */
2627         IRLoadG* lg = st->Ist.LoadG.details;
2628         vassert(isIRAtom(lg->addr));
2629         vassert(isIRAtom(lg->alt));
2630         vassert(isIRAtom(lg->guard));
2631         IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
2632         IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
2633         IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2634         if (fguard->tag == Iex_Const) {
2635            /* The condition on this load has folded down to a constant. */
2636            vassert(fguard->Iex.Const.con->tag == Ico_U1);
2637            if (fguard->Iex.Const.con->Ico.U1 == False) {
2638               /* The load is not going to happen -- instead 'alt' is
2639                  assigned to 'dst'.  */
2640               return IRStmt_WrTmp(lg->dst, falt);
2641            } else {
2642               vassert(fguard->Iex.Const.con->Ico.U1 == True);
2643               /* The load is always going to happen.  We want to
2644                  convert to an unconditional load and assign to 'dst'
2645                  (IRStmt_WrTmp).  Problem is we need an extra temp to
2646                  hold the loaded value, but none is available.
2647                  Instead, reconstitute the conditional load (with
2648                  folded args, of course) and let the caller of this
2649                  routine deal with the problem. */
2650            }
2651         }
2652         return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2653      }
2654
2655      case Ist_CAS: {
2656         IRCAS *cas, *cas2;
2657         cas = st->Ist.CAS.details;
2658         vassert(isIRAtom(cas->addr));
2659         vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2660         vassert(isIRAtom(cas->expdLo));
2661         vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2662         vassert(isIRAtom(cas->dataLo));
2663         cas2 = mkIRCAS(
2664                   cas->oldHi, cas->oldLo, cas->end,
2665                   fold_Expr(env, subst_Expr(env, cas->addr)),
2666                   cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2667                               : NULL,
2668                   fold_Expr(env, subst_Expr(env, cas->expdLo)),
2669                   cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2670                               : NULL,
2671                   fold_Expr(env, subst_Expr(env, cas->dataLo))
2672                );
2673         return IRStmt_CAS(cas2);
2674      }
2675
2676      case Ist_LLSC:
2677         vassert(isIRAtom(st->Ist.LLSC.addr));
2678         if (st->Ist.LLSC.storedata)
2679            vassert(isIRAtom(st->Ist.LLSC.storedata));
2680         return IRStmt_LLSC(
2681                   st->Ist.LLSC.end,
2682                   st->Ist.LLSC.result,
2683                   fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2684                   st->Ist.LLSC.storedata
2685                      ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2686                      : NULL
2687                );
2688
2689      case Ist_Dirty: {
2690         Int     i;
2691         IRDirty *d, *d2;
2692         d = st->Ist.Dirty.details;
2693         d2 = emptyIRDirty();
2694         *d2 = *d;
2695         d2->args = shallowCopyIRExprVec(d2->args);
2696         if (d2->mFx != Ifx_None) {
2697            vassert(isIRAtom(d2->mAddr));
2698            d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2699         }
2700         vassert(isIRAtom(d2->guard));
2701         d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2702         for (i = 0; d2->args[i]; i++) {
2703            IRExpr* arg = d2->args[i];
2704            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
2705               vassert(isIRAtom(arg));
2706               d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2707            }
2708         }
2709         return IRStmt_Dirty(d2);
2710      }
2711
2712      case Ist_IMark:
2713         return IRStmt_IMark(st->Ist.IMark.addr,
2714                             st->Ist.IMark.len,
2715                             st->Ist.IMark.delta);
2716
2717      case Ist_NoOp:
2718         return IRStmt_NoOp();
2719
2720      case Ist_MBE:
2721         return IRStmt_MBE(st->Ist.MBE.event);
2722
2723      case Ist_Exit: {
2724         IRExpr* fcond;
2725         vassert(isIRAtom(st->Ist.Exit.guard));
2726         fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2727         if (fcond->tag == Iex_Const) {
2728            /* Interesting.  The condition on this exit has folded down to
2729               a constant. */
2730            vassert(fcond->Iex.Const.con->tag == Ico_U1);
2731            if (fcond->Iex.Const.con->Ico.U1 == False) {
2732               /* exit is never going to happen, so dump the statement. */
2733               return IRStmt_NoOp();
2734            } else {
2735               vassert(fcond->Iex.Const.con->Ico.U1 == True);
2736               /* Hmmm.  The exit has become unconditional.  Leave it
2737                  as it is for now, since we'd have to truncate the BB
2738                  at this point, which is tricky.  Such truncation is
2739                  done later by the dead-code elimination pass. */
2740               /* fall out into the reconstruct-the-exit code. */
2741               if (vex_control.iropt_verbosity > 0)
2742                  /* really a misuse of vex_control.iropt_verbosity */
2743                  vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2744            }
2745         }
2746         return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2747                                   st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2748      }
2749
2750   default:
2751      vex_printf("\n"); ppIRStmt(st);
2752      vpanic("subst_and_fold_Stmt");
2753   }
2754}
2755
2756
2757IRSB* cprop_BB ( IRSB* in )
2758{
2759   Int      i;
2760   IRSB*    out;
2761   IRStmt*  st2;
2762   Int      n_tmps = in->tyenv->types_used;
2763   IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2764   /* Keep track of IRStmt_LoadGs that we need to revisit after
2765      processing all the other statements. */
2766   const Int N_FIXUPS = 16;
2767   Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2768   Int n_fixups = 0;
2769
2770   out = emptyIRSB();
2771   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2772
2773   /* Set up the env with which travels forward.  This holds a
2774      substitution, mapping IRTemps to IRExprs. The environment
2775      is to be applied as we move along.  Keys are IRTemps.
2776      Values are IRExpr*s.
2777   */
2778   for (i = 0; i < n_tmps; i++)
2779      env[i] = NULL;
2780
2781   /* For each original SSA-form stmt ... */
2782   for (i = 0; i < in->stmts_used; i++) {
2783
2784      /* First apply the substitution to the current stmt.  This
2785         propagates in any constants and tmp-tmp assignments
2786         accumulated prior to this point.  As part of the subst_Stmt
2787         call, also then fold any constant expressions resulting. */
2788
2789      st2 = in->stmts[i];
2790
2791      /* perhaps st2 is already a no-op? */
2792      if (st2->tag == Ist_NoOp) continue;
2793
2794      st2 = subst_and_fold_Stmt( env, st2 );
2795
2796      /* Deal with some post-folding special cases. */
2797      switch (st2->tag) {
2798
2799         /* If the statement has been folded into a no-op, forget
2800            it. */
2801         case Ist_NoOp:
2802            continue;
2803
2804         /* If the statement assigns to an IRTemp add it to the
2805            running environment. This is for the benefit of copy
2806            propagation and to allow sameIRExpr look through
2807            IRTemps. */
2808         case Ist_WrTmp: {
2809            vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2810            env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2811
2812            /* 't1 = t2' -- don't add to BB; will be optimized out */
2813            if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2814               continue;
2815
2816            /* 't = const' && 'const != F64i' -- don't add to BB
2817               Note, we choose not to propagate const when const is an
2818               F64i, so that F64i literals can be CSE'd later.  This
2819               helps x86 floating point code generation. */
2820            if (st2->Ist.WrTmp.data->tag == Iex_Const
2821                && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2822               continue;
2823            }
2824            /* else add it to the output, as normal */
2825            break;
2826         }
2827
2828         case Ist_LoadG: {
2829            IRLoadG* lg    = st2->Ist.LoadG.details;
2830            IRExpr*  guard = lg->guard;
2831            if (guard->tag == Iex_Const) {
2832               /* The guard has folded to a constant, and that
2833                  constant must be 1:I1, since subst_and_fold_Stmt
2834                  folds out the case 0:I1 by itself. */
2835               vassert(guard->Iex.Const.con->tag == Ico_U1);
2836               vassert(guard->Iex.Const.con->Ico.U1 == True);
2837               /* Add a NoOp here as a placeholder, and make a note of
2838                  where it is in the output block.  Afterwards we'll
2839                  come back here and transform the NoOp and the LoadG
2840                  into a load-convert pair.  The fixups[] entry
2841                  refers to the inserted NoOp, and we expect to find
2842                  the relevant LoadG immediately after it. */
2843               vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2844               if (n_fixups < N_FIXUPS) {
2845                  fixups[n_fixups++] = out->stmts_used;
2846                  addStmtToIRSB( out, IRStmt_NoOp() );
2847               }
2848            }
2849            /* And always add the LoadG to the output, regardless. */
2850            break;
2851         }
2852
2853      default:
2854         break;
2855      }
2856
2857      /* Not interesting, copy st2 into the output block. */
2858      addStmtToIRSB( out, st2 );
2859   }
2860
2861#  if STATS_IROPT
2862   vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
2863              invocation_count, recursion_count, success_count,
2864              recursion_success_count, max_nodes_visited);
2865#  endif
2866
2867   out->next     = subst_Expr( env, in->next );
2868   out->jumpkind = in->jumpkind;
2869   out->offsIP   = in->offsIP;
2870
2871   /* Process any leftover unconditional LoadGs that we noticed
2872      in the main pass. */
2873   vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2874   for (i = 0; i < n_fixups; i++) {
2875      Int ix = fixups[i];
2876      /* Carefully verify that the LoadG has the expected form. */
2877      vassert(ix >= 0 && ix+1 < out->stmts_used);
2878      IRStmt* nop = out->stmts[ix];
2879      IRStmt* lgu = out->stmts[ix+1];
2880      vassert(nop->tag == Ist_NoOp);
2881      vassert(lgu->tag == Ist_LoadG);
2882      IRLoadG* lg    = lgu->Ist.LoadG.details;
2883      IRExpr*  guard = lg->guard;
2884      vassert(guard->Iex.Const.con->tag == Ico_U1);
2885      vassert(guard->Iex.Const.con->Ico.U1 == True);
2886      /* Figure out the load and result types, and the implied
2887         conversion operation. */
2888      IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2889      typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2890      IROp cvtOp = Iop_INVALID;
2891      switch (lg->cvt) {
2892         case ILGop_IdentV128:
2893         case ILGop_Ident64:
2894         case ILGop_Ident32: break;
2895         case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
2896         case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
2897         case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2898         case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2899         default: vpanic("cprop_BB: unhandled ILGOp");
2900      }
2901      /* Replace the placeholder NoOp by the required unconditional
2902         load. */
2903      IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2904      out->stmts[ix]
2905         = IRStmt_WrTmp(tLoaded,
2906                        IRExpr_Load(lg->end, cvtArg, lg->addr));
2907      /* Replace the LoadG by a conversion from the loaded value's
2908         type to the required result type. */
2909      out->stmts[ix+1]
2910         = IRStmt_WrTmp(
2911              lg->dst, cvtOp == Iop_INVALID
2912                          ? IRExpr_RdTmp(tLoaded)
2913                          : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2914   }
2915
2916   return out;
2917}
2918
2919
2920/*---------------------------------------------------------------*/
2921/*--- Dead code (t = E) removal                               ---*/
2922/*---------------------------------------------------------------*/
2923
2924/* As a side effect, also removes all code following an unconditional
2925   side exit. */
2926
2927/* The type of the HashHW map is: a map from IRTemp to nothing
2928   -- really just operating a set or IRTemps.
2929*/
2930
2931inline
2932static void addUses_Temp ( Bool* set, IRTemp tmp )
2933{
2934   set[(Int)tmp] = True;
2935}
2936
2937static void addUses_Expr ( Bool* set, IRExpr* e )
2938{
2939   Int i;
2940   switch (e->tag) {
2941      case Iex_GetI:
2942         addUses_Expr(set, e->Iex.GetI.ix);
2943         return;
2944      case Iex_ITE:
2945         addUses_Expr(set, e->Iex.ITE.cond);
2946         addUses_Expr(set, e->Iex.ITE.iftrue);
2947         addUses_Expr(set, e->Iex.ITE.iffalse);
2948         return;
2949      case Iex_CCall:
2950         for (i = 0; e->Iex.CCall.args[i]; i++)
2951            addUses_Expr(set, e->Iex.CCall.args[i]);
2952         return;
2953      case Iex_Load:
2954         addUses_Expr(set, e->Iex.Load.addr);
2955         return;
2956      case Iex_Qop:
2957         addUses_Expr(set, e->Iex.Qop.details->arg1);
2958         addUses_Expr(set, e->Iex.Qop.details->arg2);
2959         addUses_Expr(set, e->Iex.Qop.details->arg3);
2960         addUses_Expr(set, e->Iex.Qop.details->arg4);
2961         return;
2962      case Iex_Triop:
2963         addUses_Expr(set, e->Iex.Triop.details->arg1);
2964         addUses_Expr(set, e->Iex.Triop.details->arg2);
2965         addUses_Expr(set, e->Iex.Triop.details->arg3);
2966         return;
2967      case Iex_Binop:
2968         addUses_Expr(set, e->Iex.Binop.arg1);
2969         addUses_Expr(set, e->Iex.Binop.arg2);
2970         return;
2971      case Iex_Unop:
2972         addUses_Expr(set, e->Iex.Unop.arg);
2973         return;
2974      case Iex_RdTmp:
2975         addUses_Temp(set, e->Iex.RdTmp.tmp);
2976         return;
2977      case Iex_Const:
2978      case Iex_Get:
2979         return;
2980      default:
2981         vex_printf("\n");
2982         ppIRExpr(e);
2983         vpanic("addUses_Expr");
2984   }
2985}
2986
2987static void addUses_Stmt ( Bool* set, IRStmt* st )
2988{
2989   Int      i;
2990   IRDirty* d;
2991   IRCAS*   cas;
2992   switch (st->tag) {
2993      case Ist_AbiHint:
2994         addUses_Expr(set, st->Ist.AbiHint.base);
2995         addUses_Expr(set, st->Ist.AbiHint.nia);
2996         return;
2997      case Ist_PutI:
2998         addUses_Expr(set, st->Ist.PutI.details->ix);
2999         addUses_Expr(set, st->Ist.PutI.details->data);
3000         return;
3001      case Ist_WrTmp:
3002         addUses_Expr(set, st->Ist.WrTmp.data);
3003         return;
3004      case Ist_Put:
3005         addUses_Expr(set, st->Ist.Put.data);
3006         return;
3007      case Ist_Store:
3008         addUses_Expr(set, st->Ist.Store.addr);
3009         addUses_Expr(set, st->Ist.Store.data);
3010         return;
3011      case Ist_StoreG: {
3012         IRStoreG* sg = st->Ist.StoreG.details;
3013         addUses_Expr(set, sg->addr);
3014         addUses_Expr(set, sg->data);
3015         addUses_Expr(set, sg->guard);
3016         return;
3017      }
3018      case Ist_LoadG: {
3019         IRLoadG* lg = st->Ist.LoadG.details;
3020         addUses_Expr(set, lg->addr);
3021         addUses_Expr(set, lg->alt);
3022         addUses_Expr(set, lg->guard);
3023         return;
3024      }
3025      case Ist_CAS:
3026         cas = st->Ist.CAS.details;
3027         addUses_Expr(set, cas->addr);
3028         if (cas->expdHi)
3029            addUses_Expr(set, cas->expdHi);
3030         addUses_Expr(set, cas->expdLo);
3031         if (cas->dataHi)
3032            addUses_Expr(set, cas->dataHi);
3033         addUses_Expr(set, cas->dataLo);
3034         return;
3035      case Ist_LLSC:
3036         addUses_Expr(set, st->Ist.LLSC.addr);
3037         if (st->Ist.LLSC.storedata)
3038            addUses_Expr(set, st->Ist.LLSC.storedata);
3039         return;
3040      case Ist_Dirty:
3041         d = st->Ist.Dirty.details;
3042         if (d->mFx != Ifx_None)
3043            addUses_Expr(set, d->mAddr);
3044         addUses_Expr(set, d->guard);
3045         for (i = 0; d->args[i] != NULL; i++) {
3046            IRExpr* arg = d->args[i];
3047            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
3048               addUses_Expr(set, arg);
3049         }
3050         return;
3051      case Ist_NoOp:
3052      case Ist_IMark:
3053      case Ist_MBE:
3054         return;
3055      case Ist_Exit:
3056         addUses_Expr(set, st->Ist.Exit.guard);
3057         return;
3058      default:
3059         vex_printf("\n");
3060         ppIRStmt(st);
3061         vpanic("addUses_Stmt");
3062   }
3063}
3064
3065
3066/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
3067static Bool isZeroU1 ( IRExpr* e )
3068{
3069   return toBool( e->tag == Iex_Const
3070                  && e->Iex.Const.con->tag == Ico_U1
3071                  && e->Iex.Const.con->Ico.U1 == False );
3072}
3073
3074/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
3075static Bool isOneU1 ( IRExpr* e )
3076{
3077   return toBool( e->tag == Iex_Const
3078                  && e->Iex.Const.con->tag == Ico_U1
3079                  && e->Iex.Const.con->Ico.U1 == True );
3080}
3081
3082
3083/* Note, this destructively modifies the given IRSB. */
3084
3085/* Scan backwards through statements, carrying a set of IRTemps which
3086   are known to be used after the current point.  On encountering 't =
3087   E', delete the binding if it is not used.  Otherwise, add any temp
3088   uses to the set and keep on moving backwards.
3089
3090   As an enhancement, the first (backwards) pass searches for IR exits
3091   with always-taken conditions and notes the location of the earliest
3092   one in the block.  If any such are found, a second pass copies the
3093   exit destination and jump kind to the bb-end.  Then, the exit and
3094   all statements following it are turned into no-ops.
3095*/
3096
3097/* notstatic */ void do_deadcode_BB ( IRSB* bb )
3098{
3099   Int     i, i_unconditional_exit;
3100   Int     n_tmps = bb->tyenv->types_used;
3101   Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3102   IRStmt* st;
3103
3104   for (i = 0; i < n_tmps; i++)
3105      set[i] = False;
3106
3107   /* start off by recording IRTemp uses in the next field. */
3108   addUses_Expr(set, bb->next);
3109
3110   /* First pass */
3111
3112   /* Work backwards through the stmts */
3113   i_unconditional_exit = -1;
3114   for (i = bb->stmts_used-1; i >= 0; i--) {
3115      st = bb->stmts[i];
3116      if (st->tag == Ist_NoOp)
3117         continue;
3118      /* take note of any unconditional exits */
3119      if (st->tag == Ist_Exit
3120          && isOneU1(st->Ist.Exit.guard))
3121         i_unconditional_exit = i;
3122      if (st->tag == Ist_WrTmp
3123          && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3124          /* it's an IRTemp which never got used.  Delete it. */
3125         if (DEBUG_IROPT) {
3126            vex_printf("DEAD: ");
3127            ppIRStmt(st);
3128            vex_printf("\n");
3129         }
3130         bb->stmts[i] = IRStmt_NoOp();
3131      }
3132      else
3133      if (st->tag == Ist_Dirty
3134          && st->Ist.Dirty.details->guard
3135          && isZeroU1(st->Ist.Dirty.details->guard)) {
3136         /* This is a dirty helper which will never get called.
3137            Delete it. */
3138         bb->stmts[i] = IRStmt_NoOp();
3139       }
3140       else {
3141         /* Note any IRTemp uses made by the current statement. */
3142         addUses_Stmt(set, st);
3143      }
3144   }
3145
3146   /* Optional second pass: if any unconditional exits were found,
3147      delete them and all following statements. */
3148
3149   if (i_unconditional_exit != -1) {
3150      if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3151                        i_unconditional_exit);
3152      vassert(i_unconditional_exit >= 0
3153              && i_unconditional_exit < bb->stmts_used);
3154      bb->next
3155         = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3156      bb->jumpkind
3157         = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3158      bb->offsIP
3159         = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3160      for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3161         bb->stmts[i] = IRStmt_NoOp();
3162   }
3163}
3164
3165
3166/*---------------------------------------------------------------*/
3167/*--- Specialisation of helper function calls, in             ---*/
3168/*--- collaboration with the front end                        ---*/
3169/*---------------------------------------------------------------*/
3170
3171static
3172IRSB* spec_helpers_BB(
3173         IRSB* bb,
3174         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3175      )
3176{
3177   Int     i;
3178   IRStmt* st;
3179   IRExpr* ex;
3180   Bool    any = False;
3181
3182   for (i = bb->stmts_used-1; i >= 0; i--) {
3183      st = bb->stmts[i];
3184
3185      if (st->tag != Ist_WrTmp
3186          || st->Ist.WrTmp.data->tag != Iex_CCall)
3187         continue;
3188
3189      ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3190                          st->Ist.WrTmp.data->Iex.CCall.args,
3191                          &bb->stmts[0], i );
3192      if (!ex)
3193        /* the front end can't think of a suitable replacement */
3194        continue;
3195
3196      /* We got something better.  Install it in the bb. */
3197      any = True;
3198      bb->stmts[i]
3199         = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3200
3201      if (0) {
3202         vex_printf("SPEC: ");
3203         ppIRExpr(st->Ist.WrTmp.data);
3204         vex_printf("  -->  ");
3205         ppIRExpr(ex);
3206         vex_printf("\n");
3207      }
3208   }
3209
3210   if (any)
3211      bb = flatten_BB(bb);
3212   return bb;
3213}
3214
3215
3216/*---------------------------------------------------------------*/
3217/*--- Determination of guest state aliasing relationships     ---*/
3218/*---------------------------------------------------------------*/
3219
3220/* These are helper functions for CSE and GetI/PutI transformations.
3221
3222   Determine, to the extent possible, the relationship between two
3223   guest state accesses.  The possible outcomes are:
3224
3225   * Exact alias.  These two accesses denote precisely the same
3226     piece of the guest state.
3227
3228   * Definitely no alias.  These two accesses are guaranteed not to
3229     overlap any part of the guest state.
3230
3231   * Unknown -- if neither of the above can be established.
3232
3233   If in doubt, return Unknown.  */
3234
3235typedef
3236   enum { ExactAlias, NoAlias, UnknownAlias }
3237   GSAliasing;
3238
3239
3240/* Produces the alias relation between an indexed guest
3241   state access and a non-indexed access. */
3242
3243static
3244GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3245                                    Int offset2, IRType ty2 )
3246{
3247   UInt minoff1, maxoff1, minoff2, maxoff2;
3248
3249   getArrayBounds( descr1, &minoff1, &maxoff1 );
3250   minoff2 = offset2;
3251   maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3252
3253   if (maxoff1 < minoff2 || maxoff2 < minoff1)
3254      return NoAlias;
3255
3256   /* Could probably do better here if required.  For the moment
3257      however just claim not to know anything more. */
3258   return UnknownAlias;
3259}
3260
3261
3262/* Produces the alias relation between two indexed guest state
3263   accesses. */
3264
3265static
3266GSAliasing getAliasingRelation_II (
3267              IRRegArray* descr1, IRExpr* ix1, Int bias1,
3268              IRRegArray* descr2, IRExpr* ix2, Int bias2
3269           )
3270{
3271   UInt minoff1, maxoff1, minoff2, maxoff2;
3272   Int  iters;
3273
3274   /* First try hard to show they don't alias. */
3275   getArrayBounds( descr1, &minoff1, &maxoff1 );
3276   getArrayBounds( descr2, &minoff2, &maxoff2 );
3277   if (maxoff1 < minoff2 || maxoff2 < minoff1)
3278      return NoAlias;
3279
3280   /* So the two arrays at least partially overlap.  To get any
3281      further we'll have to be sure that the descriptors are
3282      identical. */
3283   if (!eqIRRegArray(descr1, descr2))
3284      return UnknownAlias;
3285
3286   /* The descriptors are identical.  Now the only difference can be
3287      in the index expressions.  If they cannot be shown to be
3288      identical, we have to say we don't know what the aliasing
3289      relation will be.  Now, since the IR is flattened, the index
3290      expressions should be atoms -- either consts or tmps.  So that
3291      makes the comparison simple. */
3292   vassert(isIRAtom(ix1));
3293   vassert(isIRAtom(ix2));
3294   if (!eqIRAtom(ix1,ix2))
3295      return UnknownAlias;
3296
3297   /* Ok, the index expressions are identical.  So now the only way
3298      they can be different is in the bias.  Normalise this
3299      paranoidly, to reliably establish equality/non-equality. */
3300
3301   /* So now we know that the GetI and PutI index the same array
3302      with the same base.  Are the offsets the same, modulo the
3303      array size?  Do this paranoidly. */
3304   vassert(descr1->nElems == descr2->nElems);
3305   vassert(descr1->elemTy == descr2->elemTy);
3306   vassert(descr1->base   == descr2->base);
3307   iters = 0;
3308   while (bias1 < 0 || bias2 < 0) {
3309      bias1 += descr1->nElems;
3310      bias2 += descr1->nElems;
3311      iters++;
3312      if (iters > 10)
3313         vpanic("getAliasingRelation: iters");
3314   }
3315   vassert(bias1 >= 0 && bias2 >= 0);
3316   bias1 %= descr1->nElems;
3317   bias2 %= descr1->nElems;
3318   vassert(bias1 >= 0 && bias1 < descr1->nElems);
3319   vassert(bias2 >= 0 && bias2 < descr1->nElems);
3320
3321   /* Finally, biasP and biasG are normalised into the range
3322      0 .. descrP/G->nElems - 1.  And so we can establish
3323      equality/non-equality. */
3324
3325   return bias1==bias2 ? ExactAlias : NoAlias;
3326}
3327
3328
3329/*---------------------------------------------------------------*/
3330/*--- Common Subexpression Elimination                        ---*/
3331/*---------------------------------------------------------------*/
3332
3333/* Expensive in time and space. */
3334
3335/* Uses two environments:
3336   a IRTemp -> IRTemp mapping
3337   a mapping from AvailExpr* to IRTemp
3338*/
3339
3340typedef
3341   struct {
3342      enum { TCc, TCt } tag;
3343      union { IRTemp tmp; IRConst* con; } u;
3344   }
3345   TmpOrConst;
3346
3347static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3348{
3349   if (tc1->tag != tc2->tag)
3350      return False;
3351   switch (tc1->tag) {
3352      case TCc:
3353         return eqIRConst(tc1->u.con, tc2->u.con);
3354      case TCt:
3355         return tc1->u.tmp == tc2->u.tmp;
3356      default:
3357         vpanic("eqTmpOrConst");
3358   }
3359}
3360
3361static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3362{
3363   Bool eq = cee1->addr == cee2->addr;
3364   if (eq) {
3365      vassert(cee1->regparms == cee2->regparms);
3366      vassert(cee1->mcx_mask == cee2->mcx_mask);
3367      /* Names should be the same too, but we don't bother to
3368         check. */
3369   }
3370   return eq;
3371}
3372
3373/* Convert an atomic IRExpr* to a TmpOrConst. */
3374static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3375{
3376   switch (e->tag) {
3377      case Iex_RdTmp:
3378         tc->tag   = TCt;
3379         tc->u.tmp = e->Iex.RdTmp.tmp;
3380         break;
3381      case Iex_Const:
3382         tc->tag   = TCc;
3383         tc->u.con = e->Iex.Const.con;
3384         break;
3385      default:
3386         /* Getting here is a serious error.  It means that the
3387            presented arg isn't an IR atom, as it should be. */
3388         vpanic("irExpr_to_TmpOrConst");
3389   }
3390}
3391
3392/* Convert a TmpOrConst to an atomic IRExpr*. */
3393static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3394{
3395   switch (tc->tag) {
3396      case TCc: return IRExpr_Const(tc->u.con);
3397      case TCt: return IRExpr_RdTmp(tc->u.tmp);
3398      default:  vpanic("tmpOrConst_to_IRExpr");
3399   }
3400}
3401
3402/* Convert a NULL terminated IRExpr* vector to an array of
3403   TmpOrConsts, and a length. */
3404static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3405                                       /*OUT*/Int* nOuts,
3406                                       IRExpr** ins )
3407{
3408   Int i, n;
3409   /* We have to make two passes, one to count, one to copy. */
3410   for (n = 0; ins[n]; n++)
3411      ;
3412   *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3413   *nOuts = n;
3414   /* and now copy .. */
3415   for (i = 0; i < n; i++) {
3416      IRExpr*     arg = ins[i];
3417      TmpOrConst* dst = &(*outs)[i];
3418      irExpr_to_TmpOrConst(dst, arg);
3419   }
3420}
3421
3422typedef
3423   struct {
3424      enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3425             CCall, Load
3426      } tag;
3427      union {
3428         /* unop(tmp) */
3429         struct {
3430            IROp   op;
3431            IRTemp arg;
3432         } Ut;
3433         /* binop(tmp,tmp) */
3434         struct {
3435            IROp   op;
3436            IRTemp arg1;
3437            IRTemp arg2;
3438         } Btt;
3439         /* binop(tmp,const) */
3440         struct {
3441            IROp    op;
3442            IRTemp  arg1;
3443            IRConst con2;
3444         } Btc;
3445         /* binop(const,tmp) */
3446         struct {
3447            IROp    op;
3448            IRConst con1;
3449            IRTemp  arg2;
3450         } Bct;
3451         /* F64i-style const */
3452         struct {
3453            ULong f64i;
3454         } Cf64i;
3455         /* ITE(tmp,tmp,tmp) */
3456         struct {
3457            IRTemp co;
3458            IRTemp e1;
3459            IRTemp e0;
3460         } Ittt;
3461         /* ITE(tmp,tmp,const) */
3462         struct {
3463            IRTemp  co;
3464            IRTemp  e1;
3465            IRConst con0;
3466         } Ittc;
3467         /* ITE(tmp,const,tmp) */
3468         struct {
3469            IRTemp  co;
3470            IRConst con1;
3471            IRTemp  e0;
3472         } Itct;
3473         /* ITE(tmp,const,const) */
3474         struct {
3475            IRTemp  co;
3476            IRConst con1;
3477            IRConst con0;
3478         } Itcc;
3479         /* GetI(descr,tmp,bias)*/
3480         struct {
3481            IRRegArray* descr;
3482            IRTemp      ix;
3483            Int         bias;
3484         } GetIt;
3485         /* Clean helper call */
3486         struct {
3487            IRCallee*   cee;
3488            TmpOrConst* args;
3489            Int         nArgs;
3490            IRType      retty;
3491         } CCall;
3492         /* Load(end,ty,addr) */
3493         struct {
3494            IREndness  end;
3495            IRType     ty;
3496            TmpOrConst addr;
3497         } Load;
3498      } u;
3499   }
3500   AvailExpr;
3501
3502static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3503{
3504   if (LIKELY(a1->tag != a2->tag))
3505      return False;
3506   switch (a1->tag) {
3507      case Ut:
3508         return toBool(
3509                a1->u.Ut.op == a2->u.Ut.op
3510                && a1->u.Ut.arg == a2->u.Ut.arg);
3511      case Btt:
3512         return toBool(
3513                a1->u.Btt.op == a2->u.Btt.op
3514                && a1->u.Btt.arg1 == a2->u.Btt.arg1
3515                && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3516      case Btc:
3517         return toBool(
3518                a1->u.Btc.op == a2->u.Btc.op
3519                && a1->u.Btc.arg1 == a2->u.Btc.arg1
3520                && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3521      case Bct:
3522         return toBool(
3523                a1->u.Bct.op == a2->u.Bct.op
3524                && a1->u.Bct.arg2 == a2->u.Bct.arg2
3525                && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3526      case Cf64i:
3527         return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3528      case Ittt:
3529         return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3530                       && a1->u.Ittt.e1 == a2->u.Ittt.e1
3531                       && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3532      case Ittc:
3533         return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3534                       && a1->u.Ittc.e1 == a2->u.Ittc.e1
3535                       && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3536      case Itct:
3537         return toBool(a1->u.Itct.co == a2->u.Itct.co
3538                       && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3539                       && a1->u.Itct.e0 == a2->u.Itct.e0);
3540      case Itcc:
3541         return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3542                       && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3543                       && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3544      case GetIt:
3545         return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3546                       && a1->u.GetIt.ix == a2->u.GetIt.ix
3547                       && a1->u.GetIt.bias == a2->u.GetIt.bias);
3548      case CCall: {
3549         Int  i, n;
3550         Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3551                   && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3552         if (eq) {
3553            n = a1->u.CCall.nArgs;
3554            for (i = 0; i < n; i++) {
3555               if (!eqTmpOrConst( &a1->u.CCall.args[i],
3556                                  &a2->u.CCall.args[i] )) {
3557                  eq = False;
3558                  break;
3559               }
3560            }
3561         }
3562         if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3563         return eq;
3564      }
3565      case Load: {
3566         Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3567                          && a1->u.Load.ty == a2->u.Load.ty
3568                          && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3569         return eq;
3570      }
3571      default:
3572         vpanic("eq_AvailExpr");
3573   }
3574}
3575
3576static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3577{
3578   IRConst *con, *con0, *con1;
3579   switch (ae->tag) {
3580      case Ut:
3581         return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3582      case Btt:
3583         return IRExpr_Binop( ae->u.Btt.op,
3584                              IRExpr_RdTmp(ae->u.Btt.arg1),
3585                              IRExpr_RdTmp(ae->u.Btt.arg2) );
3586      case Btc:
3587         con = LibVEX_Alloc_inline(sizeof(IRConst));
3588         *con = ae->u.Btc.con2;
3589         return IRExpr_Binop( ae->u.Btc.op,
3590                              IRExpr_RdTmp(ae->u.Btc.arg1),
3591                              IRExpr_Const(con) );
3592      case Bct:
3593         con = LibVEX_Alloc_inline(sizeof(IRConst));
3594         *con = ae->u.Bct.con1;
3595         return IRExpr_Binop( ae->u.Bct.op,
3596                              IRExpr_Const(con),
3597                              IRExpr_RdTmp(ae->u.Bct.arg2) );
3598      case Cf64i:
3599         return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3600      case Ittt:
3601         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3602                           IRExpr_RdTmp(ae->u.Ittt.e1),
3603                           IRExpr_RdTmp(ae->u.Ittt.e0));
3604      case Ittc:
3605         con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3606         *con0 = ae->u.Ittc.con0;
3607         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3608                           IRExpr_RdTmp(ae->u.Ittc.e1),
3609                           IRExpr_Const(con0));
3610      case Itct:
3611         con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3612         *con1 = ae->u.Itct.con1;
3613         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3614                           IRExpr_Const(con1),
3615                           IRExpr_RdTmp(ae->u.Itct.e0));
3616
3617      case Itcc:
3618         con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3619         con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3620         *con0 = ae->u.Itcc.con0;
3621         *con1 = ae->u.Itcc.con1;
3622         return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3623                           IRExpr_Const(con1),
3624                           IRExpr_Const(con0));
3625      case GetIt:
3626         return IRExpr_GetI(ae->u.GetIt.descr,
3627                            IRExpr_RdTmp(ae->u.GetIt.ix),
3628                            ae->u.GetIt.bias);
3629      case CCall: {
3630         Int i, n = ae->u.CCall.nArgs;
3631         vassert(n >= 0);
3632         IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3633         vec[n] = NULL;
3634         for (i = 0; i < n; i++) {
3635            vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3636         }
3637         return IRExpr_CCall(ae->u.CCall.cee,
3638                             ae->u.CCall.retty,
3639                             vec);
3640      }
3641      case Load:
3642         return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3643                            tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3644      default:
3645         vpanic("availExpr_to_IRExpr");
3646   }
3647}
3648
3649inline
3650static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3651{
3652   HWord res;
3653   /* env :: IRTemp -> IRTemp */
3654   if (lookupHHW( env, &res, (HWord)tmp ))
3655      return (IRTemp)res;
3656   else
3657      return tmp;
3658}
3659
3660inline
3661static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3662                                          HashHW* env )
3663{
3664   /* env :: IRTemp -> IRTemp */
3665   if (tc->tag == TCt) {
3666      tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3667   }
3668}
3669
3670static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3671{
3672   /* env :: IRTemp -> IRTemp */
3673   switch (ae->tag) {
3674      case Ut:
3675         ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3676         break;
3677      case Btt:
3678         ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3679         ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3680         break;
3681      case Btc:
3682         ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3683         break;
3684      case Bct:
3685         ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3686         break;
3687      case Cf64i:
3688         break;
3689      case Ittt:
3690         ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3691         ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3692         ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3693         break;
3694      case Ittc:
3695         ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3696         ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3697         break;
3698      case Itct:
3699         ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3700         ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3701         break;
3702      case Itcc:
3703         ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3704         break;
3705      case GetIt:
3706         ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3707         break;
3708      case CCall: {
3709         Int i, n = ae->u.CCall.nArgs;;
3710         for (i = 0; i < n; i++) {
3711            subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3712         }
3713         break;
3714      }
3715      case Load:
3716         subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3717         break;
3718      default:
3719         vpanic("subst_AvailExpr");
3720   }
3721}
3722
3723static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3724{
3725   AvailExpr* ae;
3726
3727   switch (e->tag) {
3728      case Iex_Unop:
3729         if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3730            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3731            ae->tag      = Ut;
3732            ae->u.Ut.op  = e->Iex.Unop.op;
3733            ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3734            return ae;
3735         }
3736         break;
3737
3738      case Iex_Binop:
3739         if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3740            if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3741               ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3742               ae->tag        = Btt;
3743               ae->u.Btt.op   = e->Iex.Binop.op;
3744               ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3745               ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3746               return ae;
3747            }
3748            if (e->Iex.Binop.arg2->tag == Iex_Const) {
3749               ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3750               ae->tag        = Btc;
3751               ae->u.Btc.op   = e->Iex.Binop.op;
3752               ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3753               ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3754               return ae;
3755            }
3756         } else if (e->Iex.Binop.arg1->tag == Iex_Const
3757                    && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3758            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3759            ae->tag        = Bct;
3760            ae->u.Bct.op   = e->Iex.Binop.op;
3761            ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3762            ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3763            return ae;
3764         }
3765         break;
3766
3767      case Iex_Const:
3768         if (e->Iex.Const.con->tag == Ico_F64i) {
3769            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3770            ae->tag          = Cf64i;
3771            ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3772            return ae;
3773         }
3774         break;
3775
3776      case Iex_ITE:
3777         if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3778            if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3779               if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3780                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3781                  ae->tag       = Ittt;
3782                  ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3783                  ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3784                  ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3785                  return ae;
3786               }
3787               if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3788                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3789                  ae->tag       = Itct;
3790                  ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3791                  ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3792                  ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3793                  return ae;
3794               }
3795            } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3796               if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3797                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3798                  ae->tag       = Ittc;
3799                  ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3800                  ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3801                  ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3802                  return ae;
3803               }
3804               if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3805                  ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3806                  ae->tag       = Itcc;
3807                  ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3808                  ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3809                  ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3810                  return ae;
3811               }
3812            }
3813         }
3814         break;
3815
3816      case Iex_GetI:
3817         if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3818            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3819            ae->tag           = GetIt;
3820            ae->u.GetIt.descr = e->Iex.GetI.descr;
3821            ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3822            ae->u.GetIt.bias  = e->Iex.GetI.bias;
3823            return ae;
3824         }
3825         break;
3826
3827      case Iex_CCall:
3828         ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3829         ae->tag = CCall;
3830         /* Ok to share only the cee, since it is immutable. */
3831         ae->u.CCall.cee   = e->Iex.CCall.cee;
3832         ae->u.CCall.retty = e->Iex.CCall.retty;
3833         /* irExprVec_to_TmpOrConsts will assert if the args are
3834            neither tmps nor constants, but that's ok .. that's all they
3835            should be. */
3836         irExprVec_to_TmpOrConsts(
3837                                  &ae->u.CCall.args, &ae->u.CCall.nArgs,
3838                                  e->Iex.CCall.args
3839                                 );
3840         return ae;
3841
3842      case Iex_Load:
3843         /* If the caller of do_cse_BB has requested that loads also
3844            be CSEd, convert them into AvailExprs.  If not, we'll just
3845            return NULL here, and the load never becomes considered
3846            "available", which effectively disables CSEing of them, as
3847            desired. */
3848         if (allowLoadsToBeCSEd) {
3849            ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3850            ae->tag        = Load;
3851            ae->u.Load.end = e->Iex.Load.end;
3852            ae->u.Load.ty  = e->Iex.Load.ty;
3853            irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3854            return ae;
3855         }
3856         break;
3857
3858      default:
3859         break;
3860   }
3861
3862   return NULL;
3863}
3864
3865
3866/* The BB is modified in-place.  Returns True if any changes were
3867   made.  The caller can choose whether or not loads should be CSEd.
3868   In the normal course of things we don't do that, since CSEing loads
3869   is something of a dodgy proposition if the guest program is doing
3870   some screwy stuff to do with races and spinloops. */
3871
3872static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3873{
3874   Int        i, j, paranoia;
3875   IRTemp     t, q;
3876   IRStmt*    st;
3877   AvailExpr* eprime;
3878   AvailExpr* ae;
3879   Bool       invalidate;
3880   Bool       anyDone = False;
3881
3882   HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3883   HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3884
3885   vassert(sizeof(IRTemp) <= sizeof(HWord));
3886
3887   if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3888
3889   /* Iterate forwards over the stmts.
3890      On seeing "t = E", where E is one of the AvailExpr forms:
3891         let E' = apply tenv substitution to E
3892         search aenv for E'
3893            if a mapping E' -> q is found,
3894               replace this stmt by "t = q"
3895               and add binding t -> q to tenv
3896            else
3897               add binding E' -> t to aenv
3898               replace this stmt by "t = E'"
3899
3900      Other statements are only interesting to the extent that they
3901      might invalidate some of the expressions in aenv.  So there is
3902      an invalidate-bindings check for each statement seen.
3903   */
3904   for (i = 0; i < bb->stmts_used; i++) {
3905      st = bb->stmts[i];
3906
3907      /* ------ BEGIN invalidate aenv bindings ------ */
3908      /* This is critical: remove from aenv any E' -> .. bindings
3909         which might be invalidated by this statement.  The only
3910         vulnerable kind of bindings are the GetI and Load kinds.
3911            Dirty call - dump (paranoia level -> 2)
3912            Store      - dump (ditto)
3913            Put, PutI  - dump unless no-overlap is proven (.. -> 1)
3914         Uses getAliasingRelation_IC and getAliasingRelation_II
3915         to do the no-overlap assessments needed for Put/PutI.
3916      */
3917      switch (st->tag) {
3918         case Ist_Dirty: case Ist_Store: case Ist_MBE:
3919         case Ist_CAS: case Ist_LLSC:
3920         case Ist_StoreG:
3921            paranoia = 2; break;
3922         case Ist_Put: case Ist_PutI:
3923            paranoia = 1; break;
3924         case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3925         case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3926            paranoia = 0; break;
3927         default:
3928            vpanic("do_cse_BB(1)");
3929      }
3930
3931      if (paranoia > 0) {
3932         for (j = 0; j < aenv->used; j++) {
3933            if (!aenv->inuse[j])
3934               continue;
3935            ae = (AvailExpr*)aenv->key[j];
3936            if (ae->tag != GetIt && ae->tag != Load)
3937               continue;
3938            invalidate = False;
3939            if (paranoia >= 2) {
3940               invalidate = True;
3941            } else {
3942               vassert(paranoia == 1);
3943               if (ae->tag == Load) {
3944                  /* Loads can be invalidated by anything that could
3945                     possibly touch memory.  But in that case we
3946                     should have |paranoia| == 2 and we won't get
3947                     here.  So there's nothing to do; we don't have to
3948                     invalidate the load. */
3949               }
3950               else
3951               if (st->tag == Ist_Put) {
3952                  if (getAliasingRelation_IC(
3953                         ae->u.GetIt.descr,
3954                         IRExpr_RdTmp(ae->u.GetIt.ix),
3955                         st->Ist.Put.offset,
3956                         typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3957                      ) != NoAlias)
3958                     invalidate = True;
3959               }
3960               else
3961               if (st->tag == Ist_PutI) {
3962                  IRPutI *puti = st->Ist.PutI.details;
3963                  if (getAliasingRelation_II(
3964                         ae->u.GetIt.descr,
3965                         IRExpr_RdTmp(ae->u.GetIt.ix),
3966                         ae->u.GetIt.bias,
3967                         puti->descr,
3968                         puti->ix,
3969                         puti->bias
3970                      ) != NoAlias)
3971                     invalidate = True;
3972               }
3973               else
3974                  vpanic("do_cse_BB(2)");
3975            }
3976
3977            if (invalidate) {
3978               aenv->inuse[j] = False;
3979               aenv->key[j]   = (HWord)NULL;  /* be sure */
3980            }
3981         } /* for j */
3982      } /* paranoia > 0 */
3983
3984      /* ------ ENV invalidate aenv bindings ------ */
3985
3986      /* ignore not-interestings */
3987      if (st->tag != Ist_WrTmp)
3988         continue;
3989
3990      t = st->Ist.WrTmp.tmp;
3991      eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3992      /* ignore if not of AvailExpr form */
3993      if (!eprime)
3994         continue;
3995
3996      /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3997
3998      /* apply tenv */
3999      subst_AvailExpr( tenv, eprime );
4000
4001      /* search aenv for eprime, unfortunately the hard way */
4002      for (j = 0; j < aenv->used; j++)
4003         if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4004            break;
4005
4006      if (j < aenv->used) {
4007         /* A binding E' -> q was found.  Replace stmt by "t = q" and
4008            note the t->q binding in tenv. */
4009         /* (this is the core of the CSE action) */
4010         q = (IRTemp)aenv->val[j];
4011         bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4012         addToHHW( tenv, (HWord)t, (HWord)q );
4013         anyDone = True;
4014      } else {
4015         /* No binding was found, so instead we add E' -> t to our
4016            collection of available expressions, replace this stmt
4017            with "t = E'", and move on. */
4018         bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4019         addToHHW( aenv, (HWord)eprime, (HWord)t );
4020      }
4021   }
4022
4023   /*
4024   ppIRSB(bb);
4025   sanityCheckIRSB(bb, Ity_I32);
4026   vex_printf("\n\n");
4027   */
4028   return anyDone;
4029}
4030
4031
4032/*---------------------------------------------------------------*/
4033/*--- Add32/Sub32 chain collapsing                            ---*/
4034/*---------------------------------------------------------------*/
4035
4036/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4037
4038/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
4039   yes, set *tmp and *i32 appropriately.  *i32 is set as if the
4040   root node is Add32, not Sub32. */
4041
4042static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4043{
4044   if (e->tag != Iex_Binop)
4045      return False;
4046   if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4047      return False;
4048   if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4049      return False;
4050   if (e->Iex.Binop.arg2->tag != Iex_Const)
4051      return False;
4052   *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4053   *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4054   if (e->Iex.Binop.op == Iop_Sub32)
4055      *i32 = -*i32;
4056   return True;
4057}
4058
4059
4060/* Figure out if tmp can be expressed as tmp2 +32 const, for some
4061   other tmp2.  Scan backwards from the specified start point -- an
4062   optimisation. */
4063
4064static Bool collapseChain ( IRSB* bb, Int startHere,
4065                            IRTemp tmp,
4066                            IRTemp* tmp2, Int* i32 )
4067{
4068   Int     j, ii;
4069   IRTemp  vv;
4070   IRStmt* st;
4071   IRExpr* e;
4072
4073   /* the (var, con) pair contain the current 'representation' for
4074      'tmp'.  We start with 'tmp + 0'.  */
4075   IRTemp var = tmp;
4076   Int    con = 0;
4077
4078   /* Scan backwards to see if tmp can be replaced by some other tmp
4079     +/- a constant. */
4080   for (j = startHere; j >= 0; j--) {
4081      st = bb->stmts[j];
4082      if (st->tag != Ist_WrTmp)
4083         continue;
4084      if (st->Ist.WrTmp.tmp != var)
4085         continue;
4086      e = st->Ist.WrTmp.data;
4087      if (!isAdd32OrSub32(e, &vv, &ii))
4088         break;
4089      var = vv;
4090      con += ii;
4091   }
4092   if (j == -1)
4093      /* no earlier binding for var .. ill-formed IR */
4094      vpanic("collapseChain");
4095
4096   /* so, did we find anything interesting? */
4097   if (var == tmp)
4098      return False; /* no .. */
4099
4100   *tmp2 = var;
4101   *i32  = con;
4102   return True;
4103}
4104
4105
4106/* ------- Main function for Add32/Sub32 chain collapsing ------ */
4107
4108static void collapse_AddSub_chains_BB ( IRSB* bb )
4109{
4110   IRStmt *st;
4111   IRTemp var, var2;
4112   Int    i, con, con2;
4113
4114   for (i = bb->stmts_used-1; i >= 0; i--) {
4115      st = bb->stmts[i];
4116      if (st->tag == Ist_NoOp)
4117         continue;
4118
4119      /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4120
4121      if (st->tag == Ist_WrTmp
4122          && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4123
4124         /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4125            Find out if var can be expressed as var2 + con2. */
4126         if (collapseChain(bb, i-1, var, &var2, &con2)) {
4127            if (DEBUG_IROPT) {
4128               vex_printf("replacing1 ");
4129               ppIRStmt(st);
4130               vex_printf(" with ");
4131            }
4132            con2 += con;
4133            bb->stmts[i]
4134               = IRStmt_WrTmp(
4135                    st->Ist.WrTmp.tmp,
4136                    (con2 >= 0)
4137                      ? IRExpr_Binop(Iop_Add32,
4138                                     IRExpr_RdTmp(var2),
4139                                     IRExpr_Const(IRConst_U32(con2)))
4140                      : IRExpr_Binop(Iop_Sub32,
4141                                     IRExpr_RdTmp(var2),
4142                                     IRExpr_Const(IRConst_U32(-con2)))
4143                 );
4144            if (DEBUG_IROPT) {
4145               ppIRStmt(bb->stmts[i]);
4146               vex_printf("\n");
4147            }
4148         }
4149
4150         continue;
4151      }
4152
4153      /* Try to collapse 't1 = GetI[t2, con]'. */
4154
4155      if (st->tag == Ist_WrTmp
4156          && st->Ist.WrTmp.data->tag == Iex_GetI
4157          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4158          && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4159                                      ->Iex.RdTmp.tmp, &var2, &con2)) {
4160         if (DEBUG_IROPT) {
4161            vex_printf("replacing3 ");
4162            ppIRStmt(st);
4163            vex_printf(" with ");
4164         }
4165         con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4166         bb->stmts[i]
4167            = IRStmt_WrTmp(
4168                 st->Ist.WrTmp.tmp,
4169                 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4170                             IRExpr_RdTmp(var2),
4171                             con2));
4172         if (DEBUG_IROPT) {
4173            ppIRStmt(bb->stmts[i]);
4174            vex_printf("\n");
4175         }
4176         continue;
4177      }
4178
4179      /* Perhaps st is PutI[t, con] ? */
4180      IRPutI *puti = st->Ist.PutI.details;
4181      if (st->tag == Ist_PutI
4182          && puti->ix->tag == Iex_RdTmp
4183          && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4184                               &var2, &con2)) {
4185         if (DEBUG_IROPT) {
4186            vex_printf("replacing2 ");
4187            ppIRStmt(st);
4188            vex_printf(" with ");
4189         }
4190         con2 += puti->bias;
4191         bb->stmts[i]
4192            = IRStmt_PutI(mkIRPutI(puti->descr,
4193                                   IRExpr_RdTmp(var2),
4194                                   con2,
4195                                   puti->data));
4196         if (DEBUG_IROPT) {
4197            ppIRStmt(bb->stmts[i]);
4198            vex_printf("\n");
4199         }
4200         continue;
4201      }
4202
4203   } /* for */
4204}
4205
4206
4207/*---------------------------------------------------------------*/
4208/*--- PutI/GetI transformations                               ---*/
4209/*---------------------------------------------------------------*/
4210
4211/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4212   the given starting point to find, if any, a PutI which writes
4213   exactly the same piece of guest state, and so return the expression
4214   that the PutI writes.  This is the core of PutI-GetI forwarding. */
4215
4216static
4217IRExpr* findPutI ( IRSB* bb, Int startHere,
4218                   IRRegArray* descrG, IRExpr* ixG, Int biasG )
4219{
4220   Int        j;
4221   IRStmt*    st;
4222   GSAliasing relation;
4223
4224   if (0) {
4225      vex_printf("\nfindPutI ");
4226      ppIRRegArray(descrG);
4227      vex_printf(" ");
4228      ppIRExpr(ixG);
4229      vex_printf(" %d\n", biasG);
4230   }
4231
4232   /* Scan backwards in bb from startHere to find a suitable PutI
4233      binding for (descrG, ixG, biasG), if any. */
4234
4235   for (j = startHere; j >= 0; j--) {
4236      st = bb->stmts[j];
4237      if (st->tag == Ist_NoOp)
4238         continue;
4239
4240      if (st->tag == Ist_Put) {
4241         /* Non-indexed Put.  This can't give a binding, but we do
4242            need to check it doesn't invalidate the search by
4243            overlapping any part of the indexed guest state. */
4244
4245         relation
4246            = getAliasingRelation_IC(
4247                 descrG, ixG,
4248                 st->Ist.Put.offset,
4249                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4250
4251         if (relation == NoAlias) {
4252            /* we're OK; keep going */
4253            continue;
4254         } else {
4255            /* relation == UnknownAlias || relation == ExactAlias */
4256            /* If this assertion fails, we've found a Put which writes
4257               an area of guest state which is read by a GetI.  Which
4258               is unlikely (although not per se wrong). */
4259            vassert(relation != ExactAlias);
4260            /* This Put potentially writes guest state that the GetI
4261               reads; we must fail. */
4262            return NULL;
4263         }
4264      }
4265
4266      if (st->tag == Ist_PutI) {
4267         IRPutI *puti = st->Ist.PutI.details;
4268
4269         relation = getAliasingRelation_II(
4270                       descrG, ixG, biasG,
4271                       puti->descr,
4272                       puti->ix,
4273                       puti->bias
4274                    );
4275
4276         if (relation == NoAlias) {
4277            /* This PutI definitely doesn't overlap.  Ignore it and
4278               keep going. */
4279            continue; /* the for j loop */
4280         }
4281
4282         if (relation == UnknownAlias) {
4283            /* We don't know if this PutI writes to the same guest
4284               state that the GetI, or not.  So we have to give up. */
4285            return NULL;
4286         }
4287
4288         /* Otherwise, we've found what we're looking for.  */
4289         vassert(relation == ExactAlias);
4290         return puti->data;
4291
4292      } /* if (st->tag == Ist_PutI) */
4293
4294      if (st->tag == Ist_Dirty) {
4295         /* Be conservative.  If the dirty call has any guest effects at
4296            all, give up.  We could do better -- only give up if there
4297            are any guest writes/modifies. */
4298         if (st->Ist.Dirty.details->nFxState > 0)
4299            return NULL;
4300      }
4301
4302   } /* for */
4303
4304   /* No valid replacement was found. */
4305   return NULL;
4306}
4307
4308
4309
4310/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4311   that it writes exactly the same piece of guest state) ?  Safe
4312   answer: False. */
4313
4314static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4315{
4316   vassert(pi->tag == Ist_PutI);
4317   if (s2->tag != Ist_PutI)
4318      return False;
4319
4320   IRPutI *p1 = pi->Ist.PutI.details;
4321   IRPutI *p2 = s2->Ist.PutI.details;
4322
4323   return toBool(
4324          getAliasingRelation_II(
4325             p1->descr, p1->ix, p1->bias,
4326             p2->descr, p2->ix, p2->bias
4327          )
4328          == ExactAlias
4329          );
4330}
4331
4332
4333/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4334   overlap it?  Safe answer: True.  Note, we could do a lot better
4335   than this if needed. */
4336
4337static
4338Bool guestAccessWhichMightOverlapPutI (
4339        IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4340     )
4341{
4342   GSAliasing relation;
4343   UInt       minoffP, maxoffP;
4344
4345   vassert(pi->tag == Ist_PutI);
4346
4347   IRPutI *p1 = pi->Ist.PutI.details;
4348
4349   getArrayBounds(p1->descr, &minoffP, &maxoffP);
4350   switch (s2->tag) {
4351
4352      case Ist_NoOp:
4353      case Ist_IMark:
4354         return False;
4355
4356      case Ist_MBE:
4357      case Ist_AbiHint:
4358         /* just be paranoid ... these should be rare. */
4359         return True;
4360
4361      case Ist_CAS:
4362         /* This is unbelievably lame, but it's probably not
4363            significant from a performance point of view.  Really, a
4364            CAS is a load-store op, so it should be safe to say False.
4365            However .. */
4366         return True;
4367
4368      case Ist_Dirty:
4369         /* If the dirty call has any guest effects at all, give up.
4370            Probably could do better. */
4371         if (s2->Ist.Dirty.details->nFxState > 0)
4372            return True;
4373         return False;
4374
4375      case Ist_Put:
4376         vassert(isIRAtom(s2->Ist.Put.data));
4377         relation
4378            = getAliasingRelation_IC(
4379                 p1->descr, p1->ix,
4380                 s2->Ist.Put.offset,
4381                 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4382              );
4383         goto have_relation;
4384
4385      case Ist_PutI: {
4386         IRPutI *p2 = s2->Ist.PutI.details;
4387
4388         vassert(isIRAtom(p2->ix));
4389         vassert(isIRAtom(p2->data));
4390         relation
4391            = getAliasingRelation_II(
4392                 p1->descr, p1->ix, p1->bias,
4393                 p2->descr, p2->ix, p2->bias
4394              );
4395         goto have_relation;
4396      }
4397
4398      case Ist_WrTmp:
4399         if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4400            relation
4401               = getAliasingRelation_II(
4402                    p1->descr, p1->ix, p1->bias,
4403                    s2->Ist.WrTmp.data->Iex.GetI.descr,
4404                    s2->Ist.WrTmp.data->Iex.GetI.ix,
4405                    s2->Ist.WrTmp.data->Iex.GetI.bias
4406                 );
4407            goto have_relation;
4408         }
4409         if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4410            relation
4411               = getAliasingRelation_IC(
4412                    p1->descr, p1->ix,
4413                    s2->Ist.WrTmp.data->Iex.Get.offset,
4414                    s2->Ist.WrTmp.data->Iex.Get.ty
4415                 );
4416            goto have_relation;
4417         }
4418         return False;
4419
4420      case Ist_Store:
4421         vassert(isIRAtom(s2->Ist.Store.addr));
4422         vassert(isIRAtom(s2->Ist.Store.data));
4423         return False;
4424
4425      default:
4426         vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4427         vpanic("guestAccessWhichMightOverlapPutI");
4428   }
4429
4430  have_relation:
4431   if (relation == NoAlias)
4432      return False;
4433   else
4434      return True; /* ExactAlias or UnknownAlias */
4435}
4436
4437
4438
4439/* ---------- PutI/GetI transformations main functions --------- */
4440
4441/* Remove redundant GetIs, to the extent that they can be detected.
4442   bb is modified in-place. */
4443
4444static
4445void do_redundant_GetI_elimination ( IRSB* bb )
4446{
4447   Int     i;
4448   IRStmt* st;
4449
4450   for (i = bb->stmts_used-1; i >= 0; i--) {
4451      st = bb->stmts[i];
4452      if (st->tag == Ist_NoOp)
4453         continue;
4454
4455      if (st->tag == Ist_WrTmp
4456          && st->Ist.WrTmp.data->tag == Iex_GetI
4457          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4458         IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4459         IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
4460         Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
4461         IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
4462         if (replacement
4463             && isIRAtom(replacement)
4464             /* Make sure we're doing a type-safe transformation! */
4465             && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4466            if (DEBUG_IROPT) {
4467               vex_printf("rGI:  ");
4468               ppIRExpr(st->Ist.WrTmp.data);
4469               vex_printf(" -> ");
4470               ppIRExpr(replacement);
4471               vex_printf("\n");
4472            }
4473            bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4474         }
4475      }
4476   }
4477
4478}
4479
4480
4481/* Remove redundant PutIs, to the extent which they can be detected.
4482   bb is modified in-place. */
4483
4484static
4485void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4486{
4487   Int    i, j;
4488   Bool   delete;
4489   IRStmt *st, *stj;
4490
4491   vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4492
4493   for (i = 0; i < bb->stmts_used; i++) {
4494      st = bb->stmts[i];
4495      if (st->tag != Ist_PutI)
4496         continue;
4497      /* Ok, search forwards from here to see if we can find another
4498         PutI which makes this one redundant, and dodging various
4499         hazards.  Search forwards:
4500         * If conditional exit, give up (because anything after that
4501           does not postdominate this put).
4502         * If a Get which might overlap, give up (because this PutI
4503           not necessarily dead).
4504         * If a Put which is identical, stop with success.
4505         * If a Put which might overlap, but is not identical, give up.
4506         * If a dirty helper call which might write guest state, give up.
4507         * If a Put which definitely doesn't overlap, or any other
4508           kind of stmt, continue.
4509      */
4510      delete = False;
4511      for (j = i+1; j < bb->stmts_used; j++) {
4512         stj = bb->stmts[j];
4513         if (stj->tag == Ist_NoOp)
4514            continue;
4515         if (identicalPutIs(st, stj)) {
4516            /* success! */
4517            delete = True;
4518            break;
4519         }
4520         if (stj->tag == Ist_Exit)
4521            /* give up */
4522            break;
4523         if (st->tag == Ist_Dirty)
4524            /* give up; could do better here */
4525            break;
4526         if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4527            /* give up */
4528           break;
4529      }
4530
4531      if (delete) {
4532         if (DEBUG_IROPT) {
4533            vex_printf("rPI:  ");
4534            ppIRStmt(st);
4535            vex_printf("\n");
4536         }
4537         bb->stmts[i] = IRStmt_NoOp();
4538      }
4539
4540   }
4541}
4542
4543
4544/*---------------------------------------------------------------*/
4545/*--- Loop unrolling                                          ---*/
4546/*---------------------------------------------------------------*/
4547
4548/* Adjust all tmp values (names) in e by delta.  e is destructively
4549   modified. */
4550
4551static void deltaIRExpr ( IRExpr* e, Int delta )
4552{
4553   Int i;
4554   switch (e->tag) {
4555      case Iex_RdTmp:
4556         e->Iex.RdTmp.tmp += delta;
4557         break;
4558      case Iex_Get:
4559      case Iex_Const:
4560         break;
4561      case Iex_GetI:
4562         deltaIRExpr(e->Iex.GetI.ix, delta);
4563         break;
4564      case Iex_Qop:
4565         deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4566         deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4567         deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4568         deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4569         break;
4570      case Iex_Triop:
4571         deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4572         deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4573         deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4574         break;
4575      case Iex_Binop:
4576         deltaIRExpr(e->Iex.Binop.arg1, delta);
4577         deltaIRExpr(e->Iex.Binop.arg2, delta);
4578         break;
4579      case Iex_Unop:
4580         deltaIRExpr(e->Iex.Unop.arg, delta);
4581         break;
4582      case Iex_Load:
4583         deltaIRExpr(e->Iex.Load.addr, delta);
4584         break;
4585      case Iex_CCall:
4586         for (i = 0; e->Iex.CCall.args[i]; i++)
4587            deltaIRExpr(e->Iex.CCall.args[i], delta);
4588         break;
4589      case Iex_ITE:
4590         deltaIRExpr(e->Iex.ITE.cond, delta);
4591         deltaIRExpr(e->Iex.ITE.iftrue, delta);
4592         deltaIRExpr(e->Iex.ITE.iffalse, delta);
4593         break;
4594      default:
4595         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4596         vpanic("deltaIRExpr");
4597   }
4598}
4599
4600/* Adjust all tmp values (names) in st by delta.  st is destructively
4601   modified. */
4602
4603static void deltaIRStmt ( IRStmt* st, Int delta )
4604{
4605   Int      i;
4606   IRDirty* d;
4607   switch (st->tag) {
4608      case Ist_NoOp:
4609      case Ist_IMark:
4610      case Ist_MBE:
4611         break;
4612      case Ist_AbiHint:
4613         deltaIRExpr(st->Ist.AbiHint.base, delta);
4614         deltaIRExpr(st->Ist.AbiHint.nia, delta);
4615         break;
4616      case Ist_Put:
4617         deltaIRExpr(st->Ist.Put.data, delta);
4618         break;
4619      case Ist_PutI:
4620         deltaIRExpr(st->Ist.PutI.details->ix, delta);
4621         deltaIRExpr(st->Ist.PutI.details->data, delta);
4622         break;
4623      case Ist_WrTmp:
4624         st->Ist.WrTmp.tmp += delta;
4625         deltaIRExpr(st->Ist.WrTmp.data, delta);
4626         break;
4627      case Ist_Exit:
4628         deltaIRExpr(st->Ist.Exit.guard, delta);
4629         break;
4630      case Ist_Store:
4631         deltaIRExpr(st->Ist.Store.addr, delta);
4632         deltaIRExpr(st->Ist.Store.data, delta);
4633         break;
4634      case Ist_StoreG: {
4635         IRStoreG* sg = st->Ist.StoreG.details;
4636         deltaIRExpr(sg->addr, delta);
4637         deltaIRExpr(sg->data, delta);
4638         deltaIRExpr(sg->guard, delta);
4639         break;
4640      }
4641      case Ist_LoadG: {
4642         IRLoadG* lg = st->Ist.LoadG.details;
4643         lg->dst += delta;
4644         deltaIRExpr(lg->addr, delta);
4645         deltaIRExpr(lg->alt, delta);
4646         deltaIRExpr(lg->guard, delta);
4647         break;
4648      }
4649      case Ist_CAS:
4650         if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4651            st->Ist.CAS.details->oldHi += delta;
4652         st->Ist.CAS.details->oldLo += delta;
4653         deltaIRExpr(st->Ist.CAS.details->addr, delta);
4654         if (st->Ist.CAS.details->expdHi)
4655            deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4656         deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4657         if (st->Ist.CAS.details->dataHi)
4658            deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4659         deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4660         break;
4661      case Ist_LLSC:
4662         st->Ist.LLSC.result += delta;
4663         deltaIRExpr(st->Ist.LLSC.addr, delta);
4664         if (st->Ist.LLSC.storedata)
4665            deltaIRExpr(st->Ist.LLSC.storedata, delta);
4666         break;
4667      case Ist_Dirty:
4668         d = st->Ist.Dirty.details;
4669         deltaIRExpr(d->guard, delta);
4670         for (i = 0; d->args[i]; i++) {
4671            IRExpr* arg = d->args[i];
4672            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
4673               deltaIRExpr(arg, delta);
4674         }
4675         if (d->tmp != IRTemp_INVALID)
4676            d->tmp += delta;
4677         if (d->mAddr)
4678            deltaIRExpr(d->mAddr, delta);
4679         break;
4680      default:
4681         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4682         vpanic("deltaIRStmt");
4683   }
4684}
4685
4686
4687/* If possible, return a loop-unrolled version of bb0.  The original
4688   is changed.  If not possible, return NULL.  */
4689
4690/* The two schemas considered are:
4691
4692     X: BODY; goto X
4693
4694     which unrolls to (eg)  X: BODY;BODY; goto X
4695
4696   and
4697
4698       X: BODY; if (c) goto X; goto Y
4699   which trivially transforms to
4700       X: BODY; if (!c) goto Y; goto X;
4701   so it falls in the scope of the first case.
4702
4703   X and Y must be literal (guest) addresses.
4704*/
4705
4706static Int calc_unroll_factor( IRSB* bb )
4707{
4708   Int n_stmts, i;
4709
4710   n_stmts = 0;
4711   for (i = 0; i < bb->stmts_used; i++) {
4712      if (bb->stmts[i]->tag != Ist_NoOp)
4713         n_stmts++;
4714   }
4715
4716   if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4717      if (vex_control.iropt_verbosity > 0)
4718         vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4719                    n_stmts, 8* n_stmts);
4720      return 8;
4721   }
4722   if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4723      if (vex_control.iropt_verbosity > 0)
4724         vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4725                    n_stmts, 4* n_stmts);
4726      return 4;
4727   }
4728
4729   if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4730      if (vex_control.iropt_verbosity > 0)
4731         vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4732                    n_stmts, 2* n_stmts);
4733      return 2;
4734   }
4735
4736   if (vex_control.iropt_verbosity > 0)
4737      vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4738
4739   return 1;
4740}
4741
4742
4743static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4744{
4745   Int      i, j, jmax, n_vars;
4746   Bool     xxx_known;
4747   Addr64   xxx_value, yyy_value;
4748   IRExpr*  udst;
4749   IRStmt*  st;
4750   IRConst* con;
4751   IRSB     *bb1, *bb2;
4752   Int      unroll_factor;
4753
4754   if (vex_control.iropt_unroll_thresh <= 0)
4755      return NULL;
4756
4757   /* First off, figure out if we can unroll this loop.  Do this
4758      without modifying bb0. */
4759
4760   if (bb0->jumpkind != Ijk_Boring)
4761      return NULL;
4762
4763   xxx_known = False;
4764   xxx_value = 0;
4765
4766   /* Extract the next-guest address.  If it isn't a literal, we
4767      have to give up. */
4768
4769   udst = bb0->next;
4770   if (udst->tag == Iex_Const
4771       && (udst->Iex.Const.con->tag == Ico_U32
4772           || udst->Iex.Const.con->tag == Ico_U64)) {
4773      /* The BB ends in a jump to a literal location. */
4774      xxx_known = True;
4775      xxx_value = udst->Iex.Const.con->tag == Ico_U64
4776                    ?  udst->Iex.Const.con->Ico.U64
4777                    : (Addr64)(udst->Iex.Const.con->Ico.U32);
4778   }
4779
4780   if (!xxx_known)
4781      return NULL;
4782
4783   /* Now we know the BB ends to a jump to a literal location.  If
4784      it's a jump to itself (viz, idiom #1), move directly to the
4785      unrolling stage, first cloning the bb so the original isn't
4786      modified. */
4787   if (xxx_value == my_addr) {
4788      unroll_factor = calc_unroll_factor( bb0 );
4789      if (unroll_factor < 2)
4790         return NULL;
4791      bb1 = deepCopyIRSB( bb0 );
4792      bb0 = NULL;
4793      udst = NULL; /* is now invalid */
4794      goto do_unroll;
4795   }
4796
4797   /* Search for the second idiomatic form:
4798        X: BODY; if (c) goto X; goto Y
4799      We know Y, but need to establish that the last stmt
4800      is 'if (c) goto X'.
4801   */
4802   yyy_value = xxx_value;
4803   for (i = bb0->stmts_used-1; i >= 0; i--)
4804      if (bb0->stmts[i])
4805         break;
4806
4807   if (i < 0)
4808      return NULL; /* block with no stmts.  Strange. */
4809
4810   st = bb0->stmts[i];
4811   if (st->tag != Ist_Exit)
4812      return NULL;
4813   if (st->Ist.Exit.jk != Ijk_Boring)
4814      return NULL;
4815
4816   con = st->Ist.Exit.dst;
4817   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4818
4819   xxx_value = con->tag == Ico_U64
4820                  ? st->Ist.Exit.dst->Ico.U64
4821                  : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4822
4823   /* If this assertion fails, we have some kind of type error. */
4824   vassert(con->tag == udst->Iex.Const.con->tag);
4825
4826   if (xxx_value != my_addr)
4827      /* We didn't find either idiom.  Give up. */
4828      return NULL;
4829
4830   /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
4831      yyy values (which makes it look like idiom #1), and go into
4832      unrolling proper.  This means finding (again) the last stmt, in
4833      the copied BB. */
4834
4835   unroll_factor = calc_unroll_factor( bb0 );
4836   if (unroll_factor < 2)
4837      return NULL;
4838
4839   bb1 = deepCopyIRSB( bb0 );
4840   bb0 = NULL;
4841   udst = NULL; /* is now invalid */
4842   for (i = bb1->stmts_used-1; i >= 0; i--)
4843      if (bb1->stmts[i])
4844         break;
4845
4846   /* The next bunch of assertions should be true since we already
4847      found and checked the last stmt in the original bb. */
4848
4849   vassert(i >= 0);
4850
4851   st = bb1->stmts[i];
4852   vassert(st->tag == Ist_Exit);
4853
4854   con = st->Ist.Exit.dst;
4855   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4856
4857   udst = bb1->next;
4858   vassert(udst->tag == Iex_Const);
4859   vassert(udst->Iex.Const.con->tag == Ico_U32
4860          || udst->Iex.Const.con->tag == Ico_U64);
4861   vassert(con->tag == udst->Iex.Const.con->tag);
4862
4863   /* switch the xxx and yyy fields around */
4864   if (con->tag == Ico_U64) {
4865      udst->Iex.Const.con->Ico.U64 = xxx_value;
4866      con->Ico.U64 = yyy_value;
4867   } else {
4868      udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4869      con->Ico.U32 = (UInt)yyy_value;
4870   }
4871
4872   /* negate the test condition */
4873   st->Ist.Exit.guard
4874      = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4875
4876   /* --- The unroller proper.  Both idioms are by now --- */
4877   /* --- now converted to idiom 1. --- */
4878
4879  do_unroll:
4880
4881   vassert(unroll_factor == 2
4882           || unroll_factor == 4
4883           || unroll_factor == 8);
4884
4885   jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4886   for (j = 1; j <= jmax; j++) {
4887
4888      n_vars = bb1->tyenv->types_used;
4889
4890      bb2 = deepCopyIRSB(bb1);
4891      for (i = 0; i < n_vars; i++)
4892         (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4893
4894      for (i = 0; i < bb2->stmts_used; i++) {
4895         /* deltaIRStmt destructively modifies the stmt, but
4896            that's OK since bb2 is a complete fresh copy of bb1. */
4897         deltaIRStmt(bb2->stmts[i], n_vars);
4898         addStmtToIRSB(bb1, bb2->stmts[i]);
4899      }
4900   }
4901
4902   if (DEBUG_IROPT) {
4903      vex_printf("\nUNROLLED (%lx)\n", my_addr);
4904      ppIRSB(bb1);
4905      vex_printf("\n");
4906   }
4907
4908   /* Flattening; sigh.  The unroller succeeds in breaking flatness
4909      by negating the test condition.  This should be fixed properly.
4910      For the moment use this shotgun approach.  */
4911   return flatten_BB(bb1);
4912}
4913
4914
4915/*---------------------------------------------------------------*/
4916/*--- The tree builder                                        ---*/
4917/*---------------------------------------------------------------*/
4918
4919/* This isn't part of IR optimisation.  Really it's a pass done prior
4920   to instruction selection, which improves the code that the
4921   instruction selector can produce. */
4922
4923/* --- The 'tmp' environment is the central data structure here --- */
4924
4925/* The number of outstanding bindings we're prepared to track.
4926   The number of times the env becomes full and we have to dump
4927   the oldest binding (hence reducing code quality) falls very
4928   rapidly as the env size increases.  8 gives reasonable performance
4929   under most circumstances. */
4930#define A_NENV 10
4931
4932/* An interval. Used to record the bytes in the guest state accessed
4933   by a Put[I] statement or by (one or more) Get[I] expression(s). In
4934   case of several Get[I] expressions, the lower/upper bounds are recorded.
4935   This is conservative but cheap.
4936   E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4937   E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4938   offset 200 would be recorded as [100,203] */
4939typedef
4940   struct {
4941      Bool present;
4942      Int  low;
4943      Int  high;
4944   }
4945   Interval;
4946
4947static inline Bool
4948intervals_overlap(Interval i1, Interval i2)
4949{
4950   return (i1.low >= i2.low && i1.low <= i2.high) ||
4951          (i2.low >= i1.low && i2.low <= i1.high);
4952}
4953
4954static inline void
4955update_interval(Interval *i, Int low, Int high)
4956{
4957   vassert(low <= high);
4958
4959   if (i->present) {
4960      if (low  < i->low)  i->low  = low;
4961      if (high > i->high) i->high = high;
4962   } else {
4963      i->present = True;
4964      i->low  = low;
4965      i->high = high;
4966   }
4967}
4968
4969
4970/* bindee == NULL   ===  slot is not in use
4971   bindee != NULL   ===  slot is in use
4972*/
4973typedef
4974   struct {
4975      IRTemp  binder;
4976      IRExpr* bindee;
4977      Bool    doesLoad;
4978      /* Record the bytes of the guest state BINDEE reads from. */
4979      Interval getInterval;
4980   }
4981   ATmpInfo;
4982
4983__attribute__((unused))
4984static void ppAEnv ( ATmpInfo* env )
4985{
4986   Int i;
4987   for (i = 0; i < A_NENV; i++) {
4988      vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
4989      if (env[i].bindee)
4990         ppIRExpr(env[i].bindee);
4991      else
4992         vex_printf("(null)");
4993      vex_printf("\n");
4994   }
4995}
4996
4997/* --- Tree-traversal fns --- */
4998
4999/* Traverse an expr, and detect if any part of it reads memory or does
5000   a Get.  Be careful ... this really controls how much the
5001   tree-builder can reorder the code, so getting it right is critical.
5002*/
5003static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5004{
5005   Int i;
5006   switch (e->tag) {
5007      case Iex_CCall:
5008         for (i = 0; e->Iex.CCall.args[i]; i++)
5009            setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5010         return;
5011      case Iex_ITE:
5012         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5013         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5014         setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5015         return;
5016      case Iex_Qop:
5017         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5018         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5019         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5020         setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5021         return;
5022      case Iex_Triop:
5023         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5024         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5025         setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5026         return;
5027      case Iex_Binop:
5028         setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5029         setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5030         return;
5031      case Iex_Unop:
5032         setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5033         return;
5034      case Iex_Load:
5035         *doesLoad = True;
5036         setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5037         return;
5038      case Iex_Get: {
5039         Int low = e->Iex.Get.offset;
5040         Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5041         update_interval(getInterval, low, high);
5042         return;
5043      }
5044      case Iex_GetI: {
5045         IRRegArray *descr = e->Iex.GetI.descr;
5046         Int size = sizeofIRType(descr->elemTy);
5047         Int low  = descr->base;
5048         Int high = low + descr->nElems * size - 1;
5049         update_interval(getInterval, low, high);
5050         setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5051         return;
5052      }
5053      case Iex_RdTmp:
5054      case Iex_Const:
5055         return;
5056      default:
5057         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5058         vpanic("setHints_Expr");
5059   }
5060}
5061
5062
5063/* Add a binding to the front of the env and slide all the rest
5064   backwards.  It should be the case that the last slot is free. */
5065static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5066{
5067   Int i;
5068   vassert(env[A_NENV-1].bindee == NULL);
5069   for (i = A_NENV-1; i >= 1; i--)
5070      env[i] = env[i-1];
5071   env[0].binder   = binder;
5072   env[0].bindee   = bindee;
5073   env[0].doesLoad = False; /* filled in later */
5074   env[0].getInterval.present  = False; /* filled in later */
5075   env[0].getInterval.low  = -1; /* filled in later */
5076   env[0].getInterval.high = -1; /* filled in later */
5077}
5078
5079/* Given uses :: array of UShort, indexed by IRTemp
5080   Add the use-occurrences of temps in this expression
5081   to the env.
5082*/
5083static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5084{
5085   Int i;
5086
5087   switch (e->tag) {
5088
5089      case Iex_RdTmp: /* the only interesting case */
5090         uses[e->Iex.RdTmp.tmp]++;
5091         return;
5092
5093      case Iex_ITE:
5094         aoccCount_Expr(uses, e->Iex.ITE.cond);
5095         aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5096         aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5097         return;
5098
5099      case Iex_Qop:
5100         aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5101         aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5102         aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5103         aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5104         return;
5105
5106      case Iex_Triop:
5107         aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5108         aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5109         aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5110         return;
5111
5112      case Iex_Binop:
5113         aoccCount_Expr(uses, e->Iex.Binop.arg1);
5114         aoccCount_Expr(uses, e->Iex.Binop.arg2);
5115         return;
5116
5117      case Iex_Unop:
5118         aoccCount_Expr(uses, e->Iex.Unop.arg);
5119         return;
5120
5121      case Iex_Load:
5122         aoccCount_Expr(uses, e->Iex.Load.addr);
5123         return;
5124
5125      case Iex_CCall:
5126         for (i = 0; e->Iex.CCall.args[i]; i++)
5127            aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5128         return;
5129
5130      case Iex_GetI:
5131         aoccCount_Expr(uses, e->Iex.GetI.ix);
5132         return;
5133
5134      case Iex_Const:
5135      case Iex_Get:
5136         return;
5137
5138      default:
5139         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5140         vpanic("aoccCount_Expr");
5141    }
5142}
5143
5144
5145/* Given uses :: array of UShort, indexed by IRTemp
5146   Add the use-occurrences of temps in this statement
5147   to the env.
5148*/
5149static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5150{
5151   Int      i;
5152   IRDirty* d;
5153   IRCAS*   cas;
5154   switch (st->tag) {
5155      case Ist_AbiHint:
5156         aoccCount_Expr(uses, st->Ist.AbiHint.base);
5157         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5158         return;
5159      case Ist_WrTmp:
5160         aoccCount_Expr(uses, st->Ist.WrTmp.data);
5161         return;
5162      case Ist_Put:
5163         aoccCount_Expr(uses, st->Ist.Put.data);
5164         return;
5165      case Ist_PutI:
5166         aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5167         aoccCount_Expr(uses, st->Ist.PutI.details->data);
5168         return;
5169      case Ist_Store:
5170         aoccCount_Expr(uses, st->Ist.Store.addr);
5171         aoccCount_Expr(uses, st->Ist.Store.data);
5172         return;
5173      case Ist_StoreG: {
5174         IRStoreG* sg = st->Ist.StoreG.details;
5175         aoccCount_Expr(uses, sg->addr);
5176         aoccCount_Expr(uses, sg->data);
5177         aoccCount_Expr(uses, sg->guard);
5178         return;
5179      }
5180      case Ist_LoadG: {
5181         IRLoadG* lg = st->Ist.LoadG.details;
5182         aoccCount_Expr(uses, lg->addr);
5183         aoccCount_Expr(uses, lg->alt);
5184         aoccCount_Expr(uses, lg->guard);
5185         return;
5186      }
5187      case Ist_CAS:
5188         cas = st->Ist.CAS.details;
5189         aoccCount_Expr(uses, cas->addr);
5190         if (cas->expdHi)
5191            aoccCount_Expr(uses, cas->expdHi);
5192         aoccCount_Expr(uses, cas->expdLo);
5193         if (cas->dataHi)
5194            aoccCount_Expr(uses, cas->dataHi);
5195         aoccCount_Expr(uses, cas->dataLo);
5196         return;
5197      case Ist_LLSC:
5198         aoccCount_Expr(uses, st->Ist.LLSC.addr);
5199         if (st->Ist.LLSC.storedata)
5200            aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5201         return;
5202      case Ist_Dirty:
5203         d = st->Ist.Dirty.details;
5204         if (d->mFx != Ifx_None)
5205            aoccCount_Expr(uses, d->mAddr);
5206         aoccCount_Expr(uses, d->guard);
5207         for (i = 0; d->args[i]; i++) {
5208            IRExpr* arg = d->args[i];
5209            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5210               aoccCount_Expr(uses, arg);
5211         }
5212         return;
5213      case Ist_NoOp:
5214      case Ist_IMark:
5215      case Ist_MBE:
5216         return;
5217      case Ist_Exit:
5218         aoccCount_Expr(uses, st->Ist.Exit.guard);
5219         return;
5220      default:
5221         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5222         vpanic("aoccCount_Stmt");
5223   }
5224}
5225
5226/* Look up a binding for tmp in the env.  If found, return the bound
5227   expression, and set the env's binding to NULL so it is marked as
5228   used.  If not found, return NULL. */
5229
5230static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5231{
5232   Int i;
5233   for (i = 0; i < A_NENV; i++) {
5234      if (env[i].binder == tmp && env[i].bindee != NULL) {
5235         IRExpr* bindee = env[i].bindee;
5236         env[i].bindee = NULL;
5237         return bindee;
5238      }
5239   }
5240   return NULL;
5241}
5242
5243/* Traverse e, looking for temps.  For each observed temp, see if env
5244   contains a binding for the temp, and if so return the bound value.
5245   The env has the property that any binding it holds is
5246   'single-shot', so once a binding is used, it is marked as no longer
5247   available, by setting its .bindee field to NULL. */
5248
5249static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5250   return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5251}
5252static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5253   return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5254}
5255
5256static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5257{
5258   switch (op) {
5259   case Iop_Or32:
5260      /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
5261      if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5262         return IRExpr_Unop( Iop_CmpwNEZ32,
5263                             IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5264                                                     a2->Iex.Unop.arg ) );
5265      break;
5266
5267   case Iop_CmpNE32:
5268      /* Since X has type Ity_I1 we can simplify:
5269         CmpNE32(1Uto32(X),0)) ==> X */
5270      if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5271         return a1->Iex.Unop.arg;
5272      break;
5273
5274   default:
5275      break;
5276   }
5277   /* no reduction rule applies */
5278   return IRExpr_Binop( op, a1, a2 );
5279}
5280
5281static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5282{
5283   switch (op) {
5284   case Iop_CmpwNEZ64:
5285      /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5286      if (is_Unop(aa, Iop_CmpwNEZ64))
5287         return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5288      /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5289      if (is_Binop(aa, Iop_Or64)
5290          && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5291         return fold_IRExpr_Unop(
5292                   Iop_CmpwNEZ64,
5293                   IRExpr_Binop(Iop_Or64,
5294                                aa->Iex.Binop.arg1->Iex.Unop.arg,
5295                                aa->Iex.Binop.arg2));
5296      /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5297      if (is_Binop(aa, Iop_Or64)
5298          && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5299         return fold_IRExpr_Unop(
5300                   Iop_CmpwNEZ64,
5301                   IRExpr_Binop(Iop_Or64,
5302                                aa->Iex.Binop.arg1,
5303                                aa->Iex.Binop.arg2->Iex.Unop.arg));
5304      break;
5305   case Iop_CmpNEZ64:
5306      /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5307      if (is_Unop(aa, Iop_Left64))
5308         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5309      /* CmpNEZ64( 1Uto64(X) ) --> X */
5310      if (is_Unop(aa, Iop_1Uto64))
5311         return aa->Iex.Unop.arg;
5312      break;
5313   case Iop_CmpwNEZ32:
5314      /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5315      if (is_Unop(aa, Iop_CmpwNEZ32))
5316         return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5317      break;
5318   case Iop_CmpNEZ32:
5319      /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5320      if (is_Unop(aa, Iop_Left32))
5321         return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5322      /* CmpNEZ32( 1Uto32(X) ) --> X */
5323      if (is_Unop(aa, Iop_1Uto32))
5324         return aa->Iex.Unop.arg;
5325      /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5326      if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5327         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5328      break;
5329   case Iop_CmpNEZ8:
5330      /* CmpNEZ8( 1Uto8(X) ) --> X */
5331      if (is_Unop(aa, Iop_1Uto8))
5332         return aa->Iex.Unop.arg;
5333      break;
5334   case Iop_Left32:
5335      /* Left32( Left32(x) ) --> Left32(x) */
5336      if (is_Unop(aa, Iop_Left32))
5337         return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5338      break;
5339   case Iop_Left64:
5340      /* Left64( Left64(x) ) --> Left64(x) */
5341      if (is_Unop(aa, Iop_Left64))
5342         return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5343      break;
5344   case Iop_ZeroHI64ofV128:
5345      /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5346      if (is_Unop(aa, Iop_ZeroHI64ofV128))
5347         return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5348      break;
5349   case Iop_32to1:
5350      /* 32to1( 1Uto32 ( x ) ) --> x */
5351      if (is_Unop(aa, Iop_1Uto32))
5352         return aa->Iex.Unop.arg;
5353      /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5354      if (is_Unop(aa, Iop_CmpwNEZ32))
5355         return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5356      break;
5357   case Iop_64to1:
5358      /* 64to1( 1Uto64 ( x ) ) --> x */
5359      if (is_Unop(aa, Iop_1Uto64))
5360         return aa->Iex.Unop.arg;
5361      /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5362      if (is_Unop(aa, Iop_CmpwNEZ64))
5363         return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5364      break;
5365   case Iop_64to32:
5366      /* 64to32( 32Uto64 ( x )) --> x */
5367      if (is_Unop(aa, Iop_32Uto64))
5368         return aa->Iex.Unop.arg;
5369      /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5370      if (is_Unop(aa, Iop_8Uto64))
5371         return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5372      break;
5373
5374   case Iop_32Uto64:
5375      /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5376      if (is_Unop(aa, Iop_8Uto32))
5377         return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5378      /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5379      if (is_Unop(aa, Iop_16Uto32))
5380         return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5381      /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5382                     --> Shr64( 32Uto64(64to32(x)), sh )) */
5383      if (is_Unop(aa, Iop_64to32)
5384          && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5385          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5386          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5387                     Iop_64to32)) {
5388         return aa->Iex.Unop.arg;
5389      }
5390      /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5391         --> 32Uto64(64to32( Shl64(                x,   sh )) */
5392      if (is_Unop(aa, Iop_64to32)
5393          && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5394          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5395          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5396                     Iop_64to32)) {
5397         return
5398            IRExpr_Unop(
5399               Iop_32Uto64,
5400               IRExpr_Unop(
5401                  Iop_64to32,
5402                  IRExpr_Binop(
5403                     Iop_Shl64,
5404                     aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5405                     aa->Iex.Unop.arg->Iex.Binop.arg2
5406            )));
5407      }
5408      break;
5409
5410   case Iop_1Sto32:
5411      /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5412      if (is_Unop(aa, Iop_CmpNEZ8)
5413          && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5414          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5415          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5416                     Iop_CmpNEZ32)) {
5417         return IRExpr_Unop( Iop_CmpwNEZ32,
5418                             aa->Iex.Unop.arg->Iex.Unop.arg
5419                               ->Iex.Unop.arg->Iex.Unop.arg);
5420      }
5421      break;
5422
5423   default:
5424      break;
5425   }
5426   /* no reduction rule applies */
5427   return IRExpr_Unop( op, aa );
5428}
5429
5430static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5431{
5432   IRExpr*  e2;
5433   IRExpr** args2;
5434   Int      i;
5435
5436   switch (e->tag) {
5437
5438      case Iex_CCall:
5439         args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5440         for (i = 0; args2[i]; i++)
5441            args2[i] = atbSubst_Expr(env,args2[i]);
5442         return IRExpr_CCall(
5443                   e->Iex.CCall.cee,
5444                   e->Iex.CCall.retty,
5445                   args2
5446                );
5447      case Iex_RdTmp:
5448         e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5449         return e2 ? e2 : e;
5450      case Iex_ITE:
5451         return IRExpr_ITE(
5452                   atbSubst_Expr(env, e->Iex.ITE.cond),
5453                   atbSubst_Expr(env, e->Iex.ITE.iftrue),
5454                   atbSubst_Expr(env, e->Iex.ITE.iffalse)
5455                );
5456      case Iex_Qop:
5457         return IRExpr_Qop(
5458                   e->Iex.Qop.details->op,
5459                   atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5460                   atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5461                   atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5462                   atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5463                );
5464      case Iex_Triop:
5465         return IRExpr_Triop(
5466                   e->Iex.Triop.details->op,
5467                   atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5468                   atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5469                   atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5470                );
5471      case Iex_Binop:
5472         return fold_IRExpr_Binop(
5473                   e->Iex.Binop.op,
5474                   atbSubst_Expr(env, e->Iex.Binop.arg1),
5475                   atbSubst_Expr(env, e->Iex.Binop.arg2)
5476                );
5477      case Iex_Unop:
5478         return fold_IRExpr_Unop(
5479                   e->Iex.Unop.op,
5480                   atbSubst_Expr(env, e->Iex.Unop.arg)
5481                );
5482      case Iex_Load:
5483         return IRExpr_Load(
5484                   e->Iex.Load.end,
5485                   e->Iex.Load.ty,
5486                   atbSubst_Expr(env, e->Iex.Load.addr)
5487                );
5488      case Iex_GetI:
5489         return IRExpr_GetI(
5490                   e->Iex.GetI.descr,
5491                   atbSubst_Expr(env, e->Iex.GetI.ix),
5492                   e->Iex.GetI.bias
5493                );
5494      case Iex_Const:
5495      case Iex_Get:
5496         return e;
5497      default:
5498         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5499         vpanic("atbSubst_Expr");
5500   }
5501}
5502
5503/* Same deal as atbSubst_Expr, except for stmts. */
5504
5505static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5506{
5507   Int     i;
5508   IRDirty *d, *d2;
5509   IRCAS   *cas, *cas2;
5510   IRPutI  *puti, *puti2;
5511
5512   switch (st->tag) {
5513      case Ist_AbiHint:
5514         return IRStmt_AbiHint(
5515                   atbSubst_Expr(env, st->Ist.AbiHint.base),
5516                   st->Ist.AbiHint.len,
5517                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
5518                );
5519      case Ist_Store:
5520         return IRStmt_Store(
5521                   st->Ist.Store.end,
5522                   atbSubst_Expr(env, st->Ist.Store.addr),
5523                   atbSubst_Expr(env, st->Ist.Store.data)
5524                );
5525      case Ist_StoreG: {
5526         IRStoreG* sg = st->Ist.StoreG.details;
5527         return IRStmt_StoreG(sg->end,
5528                              atbSubst_Expr(env, sg->addr),
5529                              atbSubst_Expr(env, sg->data),
5530                              atbSubst_Expr(env, sg->guard));
5531      }
5532      case Ist_LoadG: {
5533         IRLoadG* lg = st->Ist.LoadG.details;
5534         return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5535                             atbSubst_Expr(env, lg->addr),
5536                             atbSubst_Expr(env, lg->alt),
5537                             atbSubst_Expr(env, lg->guard));
5538      }
5539      case Ist_WrTmp:
5540         return IRStmt_WrTmp(
5541                   st->Ist.WrTmp.tmp,
5542                   atbSubst_Expr(env, st->Ist.WrTmp.data)
5543                );
5544      case Ist_Put:
5545         return IRStmt_Put(
5546                   st->Ist.Put.offset,
5547                   atbSubst_Expr(env, st->Ist.Put.data)
5548                );
5549      case Ist_PutI:
5550         puti  = st->Ist.PutI.details;
5551         puti2 = mkIRPutI(puti->descr,
5552                          atbSubst_Expr(env, puti->ix),
5553                          puti->bias,
5554                          atbSubst_Expr(env, puti->data));
5555         return IRStmt_PutI(puti2);
5556
5557      case Ist_Exit:
5558         return IRStmt_Exit(
5559                   atbSubst_Expr(env, st->Ist.Exit.guard),
5560                   st->Ist.Exit.jk,
5561                   st->Ist.Exit.dst,
5562                   st->Ist.Exit.offsIP
5563                );
5564      case Ist_IMark:
5565         return IRStmt_IMark(st->Ist.IMark.addr,
5566                             st->Ist.IMark.len,
5567                             st->Ist.IMark.delta);
5568      case Ist_NoOp:
5569         return IRStmt_NoOp();
5570      case Ist_MBE:
5571         return IRStmt_MBE(st->Ist.MBE.event);
5572      case Ist_CAS:
5573         cas  = st->Ist.CAS.details;
5574         cas2 = mkIRCAS(
5575                   cas->oldHi, cas->oldLo, cas->end,
5576                   atbSubst_Expr(env, cas->addr),
5577                   cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5578                   atbSubst_Expr(env, cas->expdLo),
5579                   cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5580                   atbSubst_Expr(env, cas->dataLo)
5581                );
5582         return IRStmt_CAS(cas2);
5583      case Ist_LLSC:
5584         return IRStmt_LLSC(
5585                   st->Ist.LLSC.end,
5586                   st->Ist.LLSC.result,
5587                   atbSubst_Expr(env, st->Ist.LLSC.addr),
5588                   st->Ist.LLSC.storedata
5589                      ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5590                );
5591      case Ist_Dirty:
5592         d  = st->Ist.Dirty.details;
5593         d2 = emptyIRDirty();
5594         *d2 = *d;
5595         if (d2->mFx != Ifx_None)
5596            d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5597         d2->guard = atbSubst_Expr(env, d2->guard);
5598         for (i = 0; d2->args[i]; i++) {
5599            IRExpr* arg = d2->args[i];
5600            if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5601               d2->args[i] = atbSubst_Expr(env, arg);
5602         }
5603         return IRStmt_Dirty(d2);
5604      default:
5605         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5606         vpanic("atbSubst_Stmt");
5607   }
5608}
5609
5610inline
5611static Bool dirty_helper_stores ( const IRDirty *d )
5612{
5613   return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5614}
5615
5616inline
5617static Interval dirty_helper_puts (
5618                   const IRDirty *d,
5619                   Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5620                   VexRegisterUpdates pxControl,
5621                   /*OUT*/Bool *requiresPreciseMemExns
5622                )
5623{
5624   Int i;
5625   Interval interval;
5626
5627   /* Passing the guest state pointer opens the door to modifying the
5628      guest state under the covers.  It's not allowed, but let's be
5629      extra conservative and assume the worst. */
5630   for (i = 0; d->args[i]; i++) {
5631      if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
5632         *requiresPreciseMemExns = True;
5633         /* Assume all guest state is written. */
5634         interval.present = True;
5635         interval.low  = 0;
5636         interval.high = 0x7FFFFFFF;
5637         return interval;
5638      }
5639   }
5640
5641   /* Check the side effects on the guest state */
5642   interval.present = False;
5643   interval.low = interval.high = -1;
5644   *requiresPreciseMemExns = False;
5645
5646   for (i = 0; i < d->nFxState; ++i) {
5647      if (d->fxState[i].fx != Ifx_Read) {
5648         Int offset = d->fxState[i].offset;
5649         Int size = d->fxState[i].size;
5650         Int nRepeats = d->fxState[i].nRepeats;
5651         Int repeatLen = d->fxState[i].repeatLen;
5652
5653         if (preciseMemExnsFn(offset,
5654                              offset + nRepeats * repeatLen + size - 1,
5655                              pxControl)) {
5656            *requiresPreciseMemExns = True;
5657         }
5658         update_interval(&interval, offset,
5659                         offset + nRepeats * repeatLen + size - 1);
5660      }
5661   }
5662
5663   return interval;
5664}
5665
5666/* Return an interval if st modifies the guest state.  Via
5667   requiresPreciseMemExns return whether or not that modification
5668   requires precise exceptions. */
5669static Interval stmt_modifies_guest_state (
5670                   IRSB *bb, const IRStmt *st,
5671                   Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5672                   VexRegisterUpdates pxControl,
5673                   /*OUT*/Bool *requiresPreciseMemExns
5674                )
5675{
5676   Interval interval;
5677
5678   switch (st->tag) {
5679   case Ist_Put: {
5680      Int offset = st->Ist.Put.offset;
5681      Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5682
5683      *requiresPreciseMemExns
5684         = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5685      interval.present = True;
5686      interval.low  = offset;
5687      interval.high = offset + size - 1;
5688      return interval;
5689   }
5690
5691   case Ist_PutI: {
5692      IRRegArray *descr = st->Ist.PutI.details->descr;
5693      Int offset = descr->base;
5694      Int size = sizeofIRType(descr->elemTy);
5695
5696      /* We quietly assume here that all segments are contiguous and there
5697         are no holes. This is to avoid a loop. The assumption is conservative
5698         in the sense that we might report that precise memory exceptions are
5699         needed when in fact they are not. */
5700      *requiresPreciseMemExns
5701         = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5702                            pxControl);
5703      interval.present = True;
5704      interval.low  = offset;
5705      interval.high = offset + descr->nElems * size - 1;
5706      return interval;
5707   }
5708
5709   case Ist_Dirty:
5710      return dirty_helper_puts(st->Ist.Dirty.details,
5711                               preciseMemExnsFn, pxControl,
5712                               requiresPreciseMemExns);
5713
5714   default:
5715      *requiresPreciseMemExns = False;
5716      interval.present = False;
5717      interval.low  = -1;
5718      interval.high = -1;
5719      return interval;
5720   }
5721}
5722
5723/* notstatic */ Addr ado_treebuild_BB (
5724                        IRSB* bb,
5725                        Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5726                        VexRegisterUpdates pxControl
5727                     )
5728{
5729   Int      i, j, k, m;
5730   Bool     stmtStores, invalidateMe;
5731   Interval putInterval;
5732   IRStmt*  st;
5733   IRStmt*  st2;
5734   ATmpInfo env[A_NENV];
5735
5736   Bool   max_ga_known = False;
5737   Addr   max_ga       = 0;
5738
5739   Int       n_tmps = bb->tyenv->types_used;
5740   UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5741
5742   /* Phase 1.  Scan forwards in bb, counting use occurrences of each
5743      temp.  Also count occurrences in the bb->next field.  Take the
5744      opportunity to also find the maximum guest address in the block,
5745      since that will be needed later for deciding when we can safely
5746      elide event checks. */
5747
5748   for (i = 0; i < n_tmps; i++)
5749      uses[i] = 0;
5750
5751   for (i = 0; i < bb->stmts_used; i++) {
5752      st = bb->stmts[i];
5753      switch (st->tag) {
5754         case Ist_NoOp:
5755            continue;
5756         case Ist_IMark: {
5757            UInt len = st->Ist.IMark.len;
5758            Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5759            max_ga_known = True;
5760            if (mga > max_ga)
5761               max_ga = mga;
5762            break;
5763         }
5764         default:
5765            break;
5766      }
5767      aoccCount_Stmt( uses, st );
5768   }
5769   aoccCount_Expr(uses, bb->next );
5770
5771#  if 0
5772   for (i = 0; i < n_tmps; i++) {
5773      if (uses[i] == 0)
5774        continue;
5775      ppIRTemp( (IRTemp)i );
5776      vex_printf("  used %d\n", (Int)uses[i] );
5777   }
5778#  endif
5779
5780   /* Phase 2.  Scan forwards in bb.  For each statement in turn:
5781
5782         If the env is full, emit the end element.  This guarantees
5783         there is at least one free slot in the following.
5784
5785         On seeing 't = E', occ(t)==1,
5786            let E'=env(E)
5787            delete this stmt
5788            add t -> E' to the front of the env
5789            Examine E' and set the hints for E' appropriately
5790              (doesLoad? doesGet?)
5791
5792         On seeing any other stmt,
5793            let stmt' = env(stmt)
5794            remove from env any 't=E' binds invalidated by stmt
5795                emit the invalidated stmts
5796            emit stmt'
5797            compact any holes in env
5798              by sliding entries towards the front
5799
5800      Finally, apply env to bb->next.
5801   */
5802
5803   for (i = 0; i < A_NENV; i++) {
5804      env[i].bindee = NULL;
5805      env[i].binder = IRTemp_INVALID;
5806   }
5807
5808   /* The stmts in bb are being reordered, and we are guaranteed to
5809      end up with no more than the number we started with.  Use i to
5810      be the cursor of the current stmt examined and j <= i to be that
5811      for the current stmt being written.
5812   */
5813   j = 0;
5814   for (i = 0; i < bb->stmts_used; i++) {
5815
5816      st = bb->stmts[i];
5817      if (st->tag == Ist_NoOp)
5818         continue;
5819
5820      /* Ensure there's at least one space in the env, by emitting
5821         the oldest binding if necessary. */
5822      if (env[A_NENV-1].bindee != NULL) {
5823         bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5824                                      env[A_NENV-1].bindee );
5825         j++;
5826         vassert(j <= i);
5827         env[A_NENV-1].bindee = NULL;
5828      }
5829
5830      /* Consider current stmt. */
5831      if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5832         IRExpr *e, *e2;
5833
5834         /* optional extra: dump dead bindings as we find them.
5835            Removes the need for a prior dead-code removal pass. */
5836         if (uses[st->Ist.WrTmp.tmp] == 0) {
5837	    if (0) vex_printf("DEAD binding\n");
5838            continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5839         }
5840         vassert(uses[st->Ist.WrTmp.tmp] == 1);
5841
5842         /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
5843            actions. */
5844         e  = st->Ist.WrTmp.data;
5845         e2 = atbSubst_Expr(env, e);
5846         addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5847         setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5848         /* don't advance j, as we are deleting this stmt and instead
5849            holding it temporarily in the env. */
5850         continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5851      }
5852
5853      /* we get here for any other kind of statement. */
5854      /* 'use up' any bindings required by the current statement. */
5855      st2 = atbSubst_Stmt(env, st);
5856
5857      /* Now, before this stmt, dump any bindings in env that it
5858         invalidates.  These need to be dumped in the order in which
5859         they originally entered env -- that means from oldest to
5860         youngest. */
5861
5862      /* putInterval/stmtStores characterise what the stmt under
5863         consideration does, or might do (sidely safe @ True). */
5864
5865      Bool putRequiresPreciseMemExns;
5866      putInterval = stmt_modifies_guest_state(
5867                       bb, st, preciseMemExnsFn, pxControl,
5868                       &putRequiresPreciseMemExns
5869                    );
5870
5871      /* be True if this stmt writes memory or might do (==> we don't
5872         want to reorder other loads or stores relative to it).  Also,
5873         both LL and SC fall under this classification, since we
5874         really ought to be conservative and not reorder any other
5875         memory transactions relative to them. */
5876      stmtStores
5877         = toBool( st->tag == Ist_Store
5878                   || (st->tag == Ist_Dirty
5879                       && dirty_helper_stores(st->Ist.Dirty.details))
5880                   || st->tag == Ist_LLSC
5881                   || st->tag == Ist_CAS );
5882
5883      for (k = A_NENV-1; k >= 0; k--) {
5884         if (env[k].bindee == NULL)
5885            continue;
5886         /* Compare the actions of this stmt with the actions of
5887            binding 'k', to see if they invalidate the binding. */
5888         invalidateMe
5889            = toBool(
5890              /* a store invalidates loaded data */
5891              (env[k].doesLoad && stmtStores)
5892              /* a put invalidates get'd data, if they overlap */
5893              || ((env[k].getInterval.present && putInterval.present) &&
5894                  intervals_overlap(env[k].getInterval, putInterval))
5895              /* a put invalidates loaded data. That means, in essense, that
5896                 a load expression cannot be substituted into a statement
5897                 that follows the put. But there is nothing wrong doing so
5898                 except when the put statement requries precise exceptions.
5899                 Think of a load that is moved past a put where the put
5900                 updates the IP in the guest state. If the load generates
5901                 a segfault, the wrong address (line number) would be
5902                 reported. */
5903              || (env[k].doesLoad && putInterval.present &&
5904                  putRequiresPreciseMemExns)
5905              /* probably overly conservative: a memory bus event
5906                 invalidates absolutely everything, so that all
5907                 computation prior to it is forced to complete before
5908                 proceeding with the event (fence,lock,unlock). */
5909              || st->tag == Ist_MBE
5910              /* also be (probably overly) paranoid re AbiHints */
5911              || st->tag == Ist_AbiHint
5912              );
5913         if (invalidateMe) {
5914            bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5915            j++;
5916            vassert(j <= i);
5917            env[k].bindee = NULL;
5918         }
5919      }
5920
5921      /* Slide in-use entries in env up to the front */
5922      m = 0;
5923      for (k = 0; k < A_NENV; k++) {
5924         if (env[k].bindee != NULL) {
5925            env[m] = env[k];
5926            m++;
5927	 }
5928      }
5929      for (m = m; m < A_NENV; m++) {
5930         env[m].bindee = NULL;
5931      }
5932
5933      /* finally, emit the substituted statement */
5934      bb->stmts[j] = st2;
5935      /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5936      j++;
5937
5938      vassert(j <= i+1);
5939   } /* for each stmt in the original bb ... */
5940
5941   /* Finally ... substitute the ->next field as much as possible, and
5942      dump any left-over bindings.  Hmm.  Perhaps there should be no
5943      left over bindings?  Or any left-over bindings are
5944      by definition dead? */
5945   bb->next = atbSubst_Expr(env, bb->next);
5946   bb->stmts_used = j;
5947
5948   return max_ga_known ? max_ga : ~(Addr)0;
5949}
5950
5951
5952/*---------------------------------------------------------------*/
5953/*--- MSVC specific transformation hacks                      ---*/
5954/*---------------------------------------------------------------*/
5955
5956/* The purpose of all this is to find MSVC's idiom for non-constant
5957   bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5958   gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
5959   generates a lot of false positives from the MSVC version because it
5960   doesn't understand that XORing an undefined bit with itself gives a
5961   defined result.
5962
5963   This isn't a problem for the simple case "x ^ x", because iropt
5964   folds it to a constant zero before Memcheck ever sees it.  But in
5965   this case we have an intervening "& c" which defeats the simple
5966   case.  So we have to carefully inspect all expressions rooted at an
5967   XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5968   7 other variants resulting from swapping the order of arguments to
5969   the three binary operations.  If we get a match, we then replace
5970   the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5971
5972   The key difficulty is to spot the two uses of "a".  To normalise
5973   the IR to maximise the chances of success, we first do a CSE pass,
5974   with CSEing of loads enabled, since the two "a" expressions may be
5975   loads, which need to be commoned up.  Then we do a constant folding
5976   pass, so as to remove any tmp-to-tmp assignment chains that would
5977   make matching the original expression more difficult.
5978*/
5979
5980
5981/* Helper function for debug printing */
5982__attribute__((unused))
5983static void print_flat_expr ( IRExpr** env, IRExpr* e )
5984{
5985   if (e == NULL) {
5986      vex_printf("?");
5987      return;
5988   }
5989   switch (e->tag) {
5990      case Iex_Binop: {
5991         ppIROp(e->Iex.Binop.op);
5992         vex_printf("(");
5993         print_flat_expr(env, e->Iex.Binop.arg1);
5994         vex_printf(",");
5995         print_flat_expr(env, e->Iex.Binop.arg2);
5996         vex_printf(")");
5997         break;
5998      }
5999      case Iex_Unop: {
6000         ppIROp(e->Iex.Unop.op);
6001         vex_printf("(");
6002         print_flat_expr(env, e->Iex.Unop.arg);
6003         vex_printf(")");
6004         break;
6005      }
6006      case Iex_RdTmp:
6007         ppIRTemp(e->Iex.RdTmp.tmp);
6008         vex_printf("=");
6009         print_flat_expr(env, chase(env, e));
6010         break;
6011      case Iex_Const:
6012      case Iex_CCall:
6013      case Iex_Load:
6014      case Iex_ITE:
6015      case Iex_Get:
6016         ppIRExpr(e);
6017         break;
6018      default:
6019         vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6020         vassert(0);
6021   }
6022}
6023
6024/* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
6025   or any of the other 7 variants generated by switching the order
6026   of arguments to the outer ^, the inner ^ and the &.
6027*/
6028static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6029                                     /*OUT*/IRExpr** cc,
6030                                     IRExpr** env, IRExpr* e,
6031                                     IROp opAND, IROp opXOR)
6032{
6033#  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6034                              && (_e)->Iex.Binop.op == (_op))
6035#  define ISATOM(_e)    isIRAtom(_e)
6036#  define STEP(_e)      chase1(env, (_e))
6037#  define LL(_e)        ((_e)->Iex.Binop.arg1)
6038#  define RR(_e)        ((_e)->Iex.Binop.arg2)
6039
6040   IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6041
6042   /* This is common to all 8 cases */
6043   if (!ISBIN(e, opXOR)) goto fail;
6044
6045   /*                        -----and------ */
6046   /*                         --xor---      */
6047   /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
6048   /* find variant 2:   a1 ^ ((b ^ a2) & c) */
6049   a1 = and = xor = c = a2bL = a2bR = NULL;
6050
6051   a1   = LL(e);
6052   and  = STEP(RR(e));
6053   if (!ISBIN(and, opAND)) goto v34;
6054   xor  = STEP(LL(and));
6055   c    = RR(and);
6056   if (!ISBIN(xor, opXOR)) goto v34;
6057   a2bL = LL(xor);
6058   a2bR = RR(xor);
6059
6060   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6061      *aa = a1;
6062      *bb = a2bR;
6063      *cc = c;
6064      return 1;
6065   }
6066   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6067      *aa = a1;
6068      *bb = a2bL;
6069      *cc = c;
6070      return 2;
6071   }
6072
6073  v34:
6074   /*                   -----and------      */
6075   /*                    --xor---           */
6076   /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
6077   /* find variant 4:   ((b ^ a2) & c) ^ a1 */
6078   a1 = and = xor = c = a2bL = a2bR = NULL;
6079
6080   a1   = RR(e);
6081   and  = STEP(LL(e));
6082   if (!ISBIN(and, opAND)) goto v56;
6083   xor  = STEP(LL(and));
6084   c    = RR(and);
6085   if (!ISBIN(xor, opXOR)) goto v56;
6086   a2bL = LL(xor);
6087   a2bR = RR(xor);
6088
6089   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6090      *aa = a1;
6091      *bb = a2bR;
6092      *cc = c;
6093      return 3;
6094   }
6095   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6096      *aa = a1;
6097      *bb = a2bL;
6098      *cc = c;
6099      return 4;
6100   }
6101
6102  v56:
6103   /*                        -----and------ */
6104   /*                         --xor---      */
6105   /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
6106   /* find variant 6:   a1 ^ (c & (b ^ a2)) */
6107   a1 = and = xor = c = a2bL = a2bR = NULL;
6108
6109   a1   = LL(e);
6110   and  = STEP(RR(e));
6111   if (!ISBIN(and, opAND)) goto v78;
6112   xor  = STEP(RR(and));
6113   c    = LL(and);
6114   if (!ISBIN(xor, opXOR)) goto v78;
6115   a2bL = LL(xor);
6116   a2bR = RR(xor);
6117
6118   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6119      *aa = a1;
6120      *bb = a2bR;
6121      *cc = c;
6122      vassert(5-5); // ATC
6123      return 5;
6124   }
6125   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6126      *aa = a1;
6127      *bb = a2bL;
6128      *cc = c;
6129      vassert(6-6); // ATC
6130      return 6;
6131   }
6132
6133 v78:
6134   /*                   -----and------      */
6135   /*                    --xor---           */
6136   /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
6137   /* find variant 8:   (c & (b ^ a2)) ^ a1 */
6138   a1 = and = xor = c = a2bL = a2bR = NULL;
6139
6140   a1   = RR(e);
6141   and  = STEP(LL(e));
6142   if (!ISBIN(and, opAND)) goto fail;
6143   xor  = STEP(RR(and));
6144   c    = LL(and);
6145   if (!ISBIN(xor, opXOR)) goto fail;
6146   a2bL = LL(xor);
6147   a2bR = RR(xor);
6148
6149   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6150      *aa = a1;
6151      *bb = a2bR;
6152      *cc = c;
6153      return 7;
6154   }
6155   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6156      *aa = a1;
6157      *bb = a2bL;
6158      *cc = c;
6159      return 8;
6160   }
6161
6162 fail:
6163   return 0;
6164
6165#  undef ISBIN
6166#  undef ISATOM
6167#  undef STEP
6168#  undef LL
6169#  undef RR
6170}
6171
6172/* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6173   variants thereof generated by switching arguments around), return
6174   the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
6175static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6176{
6177   if (e->tag != Iex_Binop)
6178      return NULL;
6179
6180   const HChar* tyNm = NULL;
6181   IROp   opOR  = Iop_INVALID;
6182   IROp   opAND = Iop_INVALID;
6183   IROp   opNOT = Iop_INVALID;
6184   IROp   opXOR = Iop_INVALID;
6185   switch (e->Iex.Binop.op) {
6186      case Iop_Xor32:
6187         tyNm  = "I32";
6188         opOR  = Iop_Or32;  opAND = Iop_And32;
6189         opNOT = Iop_Not32; opXOR = Iop_Xor32;
6190         break;
6191      case Iop_Xor16:
6192         tyNm  = "I16";
6193         opOR  = Iop_Or16;  opAND = Iop_And16;
6194         opNOT = Iop_Not16; opXOR = Iop_Xor16;
6195         break;
6196      case Iop_Xor8:
6197         tyNm  = "I8";
6198         opOR  = Iop_Or8;  opAND = Iop_And8;
6199         opNOT = Iop_Not8; opXOR = Iop_Xor8;
6200         break;
6201      default:
6202         return NULL;
6203   }
6204
6205   IRExpr* aa = NULL;
6206   IRExpr* bb = NULL;
6207   IRExpr* cc = NULL;
6208   UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6209   if (variant > 0) {
6210      static UInt ctr = 0;
6211      if (0)
6212         vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6213                    "type %s, variant %u\n",
6214                    ++ctr, tyNm, variant);
6215      /* It's vitally important that the returned aa, bb and cc are
6216         atoms -- either constants or tmps.  If it's anything else
6217         (eg, a GET) then incorporating them in a tree at this point
6218         in the SB may erroneously pull them forwards (eg of a PUT
6219         that originally was after the GET) and so transform the IR
6220         wrongly.  spotBitfieldAssignment should guarantee only to
6221         give us atoms, but we check here anyway. */
6222      vassert(aa && isIRAtom(aa));
6223      vassert(bb && isIRAtom(bb));
6224      vassert(cc && isIRAtom(cc));
6225      return IRExpr_Binop(
6226                opOR,
6227                IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6228                IRExpr_Binop(opAND, bb, cc)
6229             );
6230   }
6231   return NULL;
6232}
6233
6234
6235/* SB is modified in-place.  Visit all the IRExprs and, for those
6236   which are allowed to be non-atomic, perform the XOR transform if
6237   possible.  This makes |sb| be non-flat, but that's ok, the caller
6238   can re-flatten it.  Returns True iff any changes were made. */
6239static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6240{
6241   Int  i;
6242   Bool changed = False;
6243
6244   /* Make the tmp->expr environment, so we can use it for
6245      chasing expressions. */
6246   Int      n_tmps = sb->tyenv->types_used;
6247   IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6248   for (i = 0; i < n_tmps; i++)
6249      env[i] = NULL;
6250
6251   for (i = 0; i < sb->stmts_used; i++) {
6252      IRStmt* st = sb->stmts[i];
6253      if (st->tag != Ist_WrTmp)
6254         continue;
6255      IRTemp t = st->Ist.WrTmp.tmp;
6256      vassert(t >= 0 && t < n_tmps);
6257      env[t] = st->Ist.WrTmp.data;
6258   }
6259
6260   for (i = 0; i < sb->stmts_used; i++) {
6261      IRStmt* st = sb->stmts[i];
6262
6263      switch (st->tag) {
6264         case Ist_AbiHint:
6265            vassert(isIRAtom(st->Ist.AbiHint.base));
6266            vassert(isIRAtom(st->Ist.AbiHint.nia));
6267            break;
6268         case Ist_Put:
6269            vassert(isIRAtom(st->Ist.Put.data));
6270            break;
6271         case Ist_PutI: {
6272            IRPutI* puti = st->Ist.PutI.details;
6273            vassert(isIRAtom(puti->ix));
6274            vassert(isIRAtom(puti->data));
6275            break;
6276         }
6277         case Ist_WrTmp: {
6278            /* This is the one place where an expr (st->Ist.WrTmp.data) is
6279               allowed to be more than just a constant or a tmp. */
6280            IRExpr* mb_new_data
6281               = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6282            if (mb_new_data) {
6283               //ppIRSB(sb);
6284               st->Ist.WrTmp.data = mb_new_data;
6285               //ppIRSB(sb);
6286               changed = True;
6287            }
6288            break;
6289         }
6290         case Ist_Store:
6291            vassert(isIRAtom(st->Ist.Store.addr));
6292            vassert(isIRAtom(st->Ist.Store.data));
6293            break;
6294         case Ist_StoreG: {
6295            IRStoreG* sg = st->Ist.StoreG.details;
6296            vassert(isIRAtom(sg->addr));
6297            vassert(isIRAtom(sg->data));
6298            vassert(isIRAtom(sg->guard));
6299            break;
6300         }
6301         case Ist_LoadG: {
6302            IRLoadG* lg = st->Ist.LoadG.details;
6303            vassert(isIRAtom(lg->addr));
6304            vassert(isIRAtom(lg->alt));
6305            vassert(isIRAtom(lg->guard));
6306            break;
6307         }
6308         case Ist_CAS: {
6309            IRCAS* cas = st->Ist.CAS.details;
6310            vassert(isIRAtom(cas->addr));
6311            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6312            vassert(isIRAtom(cas->expdLo));
6313            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6314            vassert(isIRAtom(cas->dataLo));
6315            break;
6316         }
6317         case Ist_LLSC:
6318            vassert(isIRAtom(st->Ist.LLSC.addr));
6319            if (st->Ist.LLSC.storedata)
6320               vassert(isIRAtom(st->Ist.LLSC.storedata));
6321            break;
6322         case Ist_Dirty: {
6323            IRDirty* d = st->Ist.Dirty.details;
6324            if (d->mFx != Ifx_None) {
6325               vassert(isIRAtom(d->mAddr));
6326            }
6327            vassert(isIRAtom(d->guard));
6328            for (Int j = 0; d->args[j]; j++) {
6329               IRExpr* arg = d->args[j];
6330               if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
6331                  vassert(isIRAtom(arg));
6332               }
6333            }
6334            break;
6335         }
6336         case Ist_IMark:
6337         case Ist_NoOp:
6338         case Ist_MBE:
6339            break;
6340         case Ist_Exit:
6341            vassert(isIRAtom(st->Ist.Exit.guard));
6342            break;
6343         default:
6344            vex_printf("\n"); ppIRStmt(st);
6345            vpanic("do_XOR_TRANSFORMS_IRSB");
6346      }
6347   }
6348
6349   vassert(isIRAtom(sb->next));
6350   return changed;
6351}
6352
6353
6354static IRSB* do_MSVC_HACKS ( IRSB* sb )
6355{
6356   // Normalise as much as we can.  This is the one-and-only place
6357   // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6358   Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6359   if (any_cse_changes) {
6360      // CSEing might have created dead code.  Remove it.
6361      sb = cprop_BB ( sb );
6362      do_deadcode_BB(sb);
6363   }
6364
6365   // Visit all atoms, do the transformation proper.  bb is modified
6366   // in-place.
6367   Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6368
6369   if (changed) {
6370      // The transformation generates non-flat expressions, so we now
6371      // need to re-flatten the block.
6372      sb = flatten_BB(sb);
6373   }
6374
6375   return sb;
6376}
6377
6378
6379/*---------------------------------------------------------------*/
6380/*--- iropt main                                              ---*/
6381/*---------------------------------------------------------------*/
6382
6383static Bool iropt_verbose = False; /* True; */
6384
6385
6386/* Do a simple cleanup pass on bb.  This is: redundant Get removal,
6387   redundant Put removal, constant propagation, dead code removal,
6388   clean helper specialisation, and dead code removal (again).
6389*/
6390
6391
6392static
6393IRSB* cheap_transformations (
6394         IRSB* bb,
6395         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6396         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6397         VexRegisterUpdates pxControl
6398      )
6399{
6400   redundant_get_removal_BB ( bb );
6401   if (iropt_verbose) {
6402      vex_printf("\n========= REDUNDANT GET\n\n" );
6403      ppIRSB(bb);
6404   }
6405
6406   if (pxControl < VexRegUpdAllregsAtEachInsn) {
6407      redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6408   }
6409   if (iropt_verbose) {
6410      vex_printf("\n========= REDUNDANT PUT\n\n" );
6411      ppIRSB(bb);
6412   }
6413
6414   bb = cprop_BB ( bb );
6415   if (iropt_verbose) {
6416      vex_printf("\n========= CPROPD\n\n" );
6417      ppIRSB(bb);
6418   }
6419
6420   do_deadcode_BB ( bb );
6421   if (iropt_verbose) {
6422      vex_printf("\n========= DEAD\n\n" );
6423      ppIRSB(bb);
6424   }
6425
6426   bb = spec_helpers_BB ( bb, specHelper );
6427   do_deadcode_BB ( bb );
6428   if (iropt_verbose) {
6429      vex_printf("\n========= SPECd \n\n" );
6430      ppIRSB(bb);
6431   }
6432
6433   return bb;
6434}
6435
6436
6437/* Do some more expensive transformations on bb, which are aimed at
6438   optimising as much as possible in the presence of GetI and PutI.  */
6439
6440static
6441IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6442{
6443   (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6444   collapse_AddSub_chains_BB( bb );
6445   do_redundant_GetI_elimination( bb );
6446   if (pxControl < VexRegUpdAllregsAtEachInsn) {
6447      do_redundant_PutI_elimination( bb, pxControl );
6448   }
6449   do_deadcode_BB( bb );
6450   return bb;
6451}
6452
6453
6454/* Scan a flattened BB to look for signs that more expensive
6455   optimisations might be useful:
6456   - find out if there are any GetIs and PutIs
6457   - find out if there are any floating or vector-typed temporaries
6458*/
6459
6460static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6461                                 /*OUT*/Bool* hasVorFtemps,
6462                                 IRSB* bb )
6463{
6464   Int      i, j;
6465   IRStmt*  st;
6466   IRDirty* d;
6467   IRCAS*   cas;
6468
6469   *hasGetIorPutI = False;
6470   *hasVorFtemps  = False;
6471
6472   for (i = 0; i < bb->stmts_used; i++) {
6473      st = bb->stmts[i];
6474      switch (st->tag) {
6475         case Ist_AbiHint:
6476            vassert(isIRAtom(st->Ist.AbiHint.base));
6477            vassert(isIRAtom(st->Ist.AbiHint.nia));
6478            break;
6479         case Ist_PutI:
6480            *hasGetIorPutI = True;
6481            break;
6482         case Ist_WrTmp:
6483            if (st->Ist.WrTmp.data->tag == Iex_GetI)
6484               *hasGetIorPutI = True;
6485            switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6486               case Ity_I1: case Ity_I8: case Ity_I16:
6487               case Ity_I32: case Ity_I64: case Ity_I128:
6488                  break;
6489               case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6490               case Ity_V128: case Ity_V256:
6491                  *hasVorFtemps = True;
6492                  break;
6493               case Ity_D32: case Ity_D64: case Ity_D128:
6494                  *hasVorFtemps = True;
6495                  break;
6496               default:
6497                  goto bad;
6498            }
6499            break;
6500         case Ist_Put:
6501            vassert(isIRAtom(st->Ist.Put.data));
6502            break;
6503         case Ist_Store:
6504            vassert(isIRAtom(st->Ist.Store.addr));
6505            vassert(isIRAtom(st->Ist.Store.data));
6506            break;
6507         case Ist_StoreG: {
6508            IRStoreG* sg = st->Ist.StoreG.details;
6509            vassert(isIRAtom(sg->addr));
6510            vassert(isIRAtom(sg->data));
6511            vassert(isIRAtom(sg->guard));
6512            break;
6513         }
6514         case Ist_LoadG: {
6515            IRLoadG* lg = st->Ist.LoadG.details;
6516            vassert(isIRAtom(lg->addr));
6517            vassert(isIRAtom(lg->alt));
6518            vassert(isIRAtom(lg->guard));
6519            break;
6520         }
6521         case Ist_CAS:
6522            cas = st->Ist.CAS.details;
6523            vassert(isIRAtom(cas->addr));
6524            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6525            vassert(isIRAtom(cas->expdLo));
6526            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6527            vassert(isIRAtom(cas->dataLo));
6528            break;
6529         case Ist_LLSC:
6530            vassert(isIRAtom(st->Ist.LLSC.addr));
6531            if (st->Ist.LLSC.storedata)
6532               vassert(isIRAtom(st->Ist.LLSC.storedata));
6533            break;
6534         case Ist_Dirty:
6535            d = st->Ist.Dirty.details;
6536            vassert(isIRAtom(d->guard));
6537            for (j = 0; d->args[j]; j++) {
6538               IRExpr* arg = d->args[j];
6539               if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
6540                  vassert(isIRAtom(arg));
6541            }
6542            if (d->mFx != Ifx_None)
6543               vassert(isIRAtom(d->mAddr));
6544            break;
6545         case Ist_NoOp:
6546         case Ist_IMark:
6547         case Ist_MBE:
6548            break;
6549         case Ist_Exit:
6550            vassert(isIRAtom(st->Ist.Exit.guard));
6551            break;
6552         default:
6553         bad:
6554            ppIRStmt(st);
6555            vpanic("considerExpensives");
6556      }
6557   }
6558}
6559
6560
6561/* ---------------- The main iropt entry point. ---------------- */
6562
6563/* exported from this file */
6564/* Rules of the game:
6565
6566   - IRExpr/IRStmt trees should be treated as immutable, as they
6567     may get shared.  So never change a field of such a tree node;
6568     instead construct and return a new one if needed.
6569*/
6570
6571
6572IRSB* do_iropt_BB(
6573         IRSB* bb0,
6574         IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6575         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6576         VexRegisterUpdates pxControl,
6577         Addr    guest_addr,
6578         VexArch guest_arch
6579      )
6580{
6581   static Int n_total     = 0;
6582   static Int n_expensive = 0;
6583
6584   Bool hasGetIorPutI, hasVorFtemps;
6585   IRSB *bb, *bb2;
6586
6587   n_total++;
6588
6589   /* First flatten the block out, since all other
6590      phases assume flat code. */
6591
6592   bb = flatten_BB ( bb0 );
6593
6594   if (iropt_verbose) {
6595      vex_printf("\n========= FLAT\n\n" );
6596      ppIRSB(bb);
6597   }
6598
6599   /* If at level 0, stop now. */
6600   if (vex_control.iropt_level <= 0) return bb;
6601
6602   /* Now do a preliminary cleanup pass, and figure out if we also
6603      need to do 'expensive' optimisations.  Expensive optimisations
6604      are deemed necessary if the block contains any GetIs or PutIs.
6605      If needed, do expensive transformations and then another cheap
6606      cleanup pass. */
6607
6608   bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6609
6610   if (guest_arch == VexArchARM) {
6611      /* Translating Thumb2 code produces a lot of chaff.  We have to
6612         work extra hard to get rid of it. */
6613      bb = cprop_BB(bb);
6614      bb = spec_helpers_BB ( bb, specHelper );
6615      if (pxControl < VexRegUpdAllregsAtEachInsn) {
6616         redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6617      }
6618      do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6619      do_deadcode_BB( bb );
6620   }
6621
6622   if (vex_control.iropt_level > 1) {
6623
6624      /* Peer at what we have, to decide how much more effort to throw
6625         at it. */
6626      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6627
6628      if (hasVorFtemps && !hasGetIorPutI) {
6629         /* If any evidence of FP or Vector activity, CSE, as that
6630            tends to mop up all manner of lardy code to do with
6631            rounding modes.  Don't bother if hasGetIorPutI since that
6632            case leads into the expensive transformations, which do
6633            CSE anyway. */
6634         (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6635         do_deadcode_BB( bb );
6636      }
6637
6638      if (hasGetIorPutI) {
6639         Bool cses;
6640         n_expensive++;
6641         if (DEBUG_IROPT)
6642            vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6643         bb = expensive_transformations( bb, pxControl );
6644         bb = cheap_transformations( bb, specHelper,
6645                                     preciseMemExnsFn, pxControl );
6646         /* Potentially common up GetIs */
6647         cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6648         if (cses)
6649            bb = cheap_transformations( bb, specHelper,
6650                                        preciseMemExnsFn, pxControl );
6651      }
6652
6653      ///////////////////////////////////////////////////////////
6654      // BEGIN MSVC optimised code transformation hacks
6655      if (0)
6656         bb = do_MSVC_HACKS(bb);
6657      // END   MSVC optimised code transformation hacks
6658      ///////////////////////////////////////////////////////////
6659
6660      /* Now have a go at unrolling simple (single-BB) loops.  If
6661         successful, clean up the results as much as possible. */
6662
6663      bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6664      if (bb2) {
6665         bb = cheap_transformations( bb2, specHelper,
6666                                     preciseMemExnsFn, pxControl );
6667         if (hasGetIorPutI) {
6668            bb = expensive_transformations( bb, pxControl );
6669            bb = cheap_transformations( bb, specHelper,
6670                                        preciseMemExnsFn, pxControl );
6671         } else {
6672            /* at least do CSE and dead code removal */
6673            do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6674            do_deadcode_BB( bb );
6675         }
6676         if (0) vex_printf("vex iropt: unrolled a loop\n");
6677      }
6678
6679   }
6680
6681   return bb;
6682}
6683
6684
6685/*---------------------------------------------------------------*/
6686/*--- end                                            ir_opt.c ---*/
6687/*---------------------------------------------------------------*/
6688