1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*---------------------------------------------------------------*/
4/*--- begin                                          ir_opt.c ---*/
5/*---------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2004-2012 OpenWorks LLP
12      info@open-works.net
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27   02110-1301, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30
31   Neither the names of the U.S. Department of Energy nor the
32   University of California nor the names of its contributors may be
33   used to endorse or promote products derived from this software
34   without prior written permission.
35*/
36
37#include "libvex_basictypes.h"
38#include "libvex_ir.h"
39#include "libvex.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "ir_opt.h"
44
45
46/* Set to 1 for lots of debugging output. */
47#define DEBUG_IROPT 0
48
49/* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50#define STATS_IROPT 0
51
52
53/* What iropt does, 29 Dec 04.
54
55   It takes an IRSB and produces a new one with the same meaning,
56   defined thus:
57
58   After execution of the new BB, all guest state and guest memory is
59   the same as after execution of the original.  This is true
60   regardless of how the block was exited (at the end vs side exit).
61
62   In addition, parts of the guest state will be identical to that
63   created by execution of the original at the following observation
64   points:
65
66   * In a dirty helper call, any parts of the guest state that the
67     helper states that it reads or modifies will be up to date.
68     Also, guest memory will be up to date.  Parts of the guest state
69     not marked as being read or modified by the helper cannot be
70     assumed to be up-to-date at the point where the helper is called.
71
72   * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
73     Immediately prior to any load or store, those parts of the guest
74     state marked as requiring precise exceptions will be up to date.
75     Also, guest memory will be up to date.  Parts of the guest state
76     not marked as requiring precise exceptions cannot be assumed to
77     be up-to-date at the point of the load/store.
78
79     If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
80     Same as minimal, but all the guest state is up to date at memory
81     exception points.
82
83     If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
84     Guest state is up to date at each instruction.
85
86   The relative order of loads and stores (including loads/stores of
87   guest memory done by dirty helpers annotated as such) is not
88   changed.  However, the relative order of loads with no intervening
89   stores/modifies may be changed.
90
91   Transformation order
92   ~~~~~~~~~~~~~~~~~~~~
93
94   There are three levels of optimisation, controlled by
95   vex_control.iropt_level.  Define first:
96
97   "Cheap transformations" are the following sequence:
98      * Redundant-Get removal
99      * Redundant-Put removal
100      * Constant propagation/folding
101      * Dead code removal
102      * Specialisation of clean helper functions
103      * Dead code removal
104
105   "Expensive transformations" are the following sequence:
106      * CSE
107      * Folding of add/sub chains
108      * Redundant-GetI removal
109      * Redundant-PutI removal
110      * Dead code removal
111
112   Then the transformations are as follows, as defined by
113   vex_control.iropt_level:
114
115   Level 0:
116      * Flatten into atomic form.
117
118   Level 1: the following sequence:
119      * Flatten into atomic form.
120      * Cheap transformations.
121
122   Level 2: the following sequence
123      * Flatten into atomic form.
124      * Cheap transformations.
125      * If block contains any floating or vector types, CSE.
126      * If block contains GetI or PutI, Expensive transformations.
127      * Try unrolling loops.  Three possible outcomes:
128        - No effect: do nothing more.
129        - Unrolled a loop, and block does not contain GetI or PutI:
130          Do: * CSE
131              * Dead code removal
132        - Unrolled a loop, and block contains GetI or PutI:
133          Do: * Expensive transformations
134              * Cheap transformations
135*/
136
137/* Implementation notes, 29 Dec 04.
138
139   TODO (important): I think rPutI removal ignores precise exceptions
140   and is therefore in a sense, wrong.  In the sense that PutIs are
141   assumed not to write parts of the guest state that we need to have
142   up-to-date at loads/stores.  So far on x86 guest that has not
143   mattered since indeed only the x87 FP registers and tags are
144   accessed using GetI/PutI, and there is no need so far for them to
145   be up to date at mem exception points.  The rPutI pass should be
146   fixed.
147
148   TODO: improve pessimistic handling of precise exceptions
149     in the tree builder.
150
151   TODO: check interaction of rGetI and dirty helpers.
152
153   F64i constants are treated differently from other constants.
154   They are not regarded as atoms, and instead lifted off and
155   bound to temps.  This allows them to participate in CSE, which
156   is important for getting good performance for x86 guest code.
157
158   CSE up F64 literals (already doing F64is)
159
160   CSE: consider carefully the requirement for precise exns
161        prior to making CSE any more aggressive.  */
162
163
164/*---------------------------------------------------------------*/
165/*--- Finite mappery, of a sort                               ---*/
166/*---------------------------------------------------------------*/
167
168/* General map from HWord-sized thing HWord-sized thing.  Could be by
169   hashing, but it's not clear whether or not this would really be any
170   faster. */
171
172typedef
173   struct {
174      Bool*  inuse;
175      HWord* key;
176      HWord* val;
177      Int    size;
178      Int    used;
179   }
180   HashHW;
181
182static HashHW* newHHW ( void )
183{
184   HashHW* h = LibVEX_Alloc(sizeof(HashHW));
185   h->size   = 8;
186   h->used   = 0;
187   h->inuse  = LibVEX_Alloc(h->size * sizeof(Bool));
188   h->key    = LibVEX_Alloc(h->size * sizeof(HWord));
189   h->val    = LibVEX_Alloc(h->size * sizeof(HWord));
190   return h;
191}
192
193
194/* Look up key in the map. */
195
196static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
197{
198   Int i;
199   /* vex_printf("lookupHHW(%llx)\n", key ); */
200   for (i = 0; i < h->used; i++) {
201      if (h->inuse[i] && h->key[i] == key) {
202         if (val)
203            *val = h->val[i];
204         return True;
205      }
206   }
207   return False;
208}
209
210
211/* Add key->val to the map.  Replaces any existing binding for key. */
212
213static void addToHHW ( HashHW* h, HWord key, HWord val )
214{
215   Int i, j;
216   /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
217
218   /* Find and replace existing binding, if any. */
219   for (i = 0; i < h->used; i++) {
220      if (h->inuse[i] && h->key[i] == key) {
221         h->val[i] = val;
222         return;
223      }
224   }
225
226   /* Ensure a space is available. */
227   if (h->used == h->size) {
228      /* Copy into arrays twice the size. */
229      Bool*  inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
230      HWord* key2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
231      HWord* val2   = LibVEX_Alloc(2 * h->size * sizeof(HWord));
232      for (i = j = 0; i < h->size; i++) {
233         if (!h->inuse[i]) continue;
234         inuse2[j] = True;
235         key2[j] = h->key[i];
236         val2[j] = h->val[i];
237         j++;
238      }
239      h->used = j;
240      h->size *= 2;
241      h->inuse = inuse2;
242      h->key = key2;
243      h->val = val2;
244   }
245
246   /* Finally, add it. */
247   vassert(h->used < h->size);
248   h->inuse[h->used] = True;
249   h->key[h->used] = key;
250   h->val[h->used] = val;
251   h->used++;
252}
253
254
255/*---------------------------------------------------------------*/
256/*--- Flattening out a BB into atomic SSA form                ---*/
257/*---------------------------------------------------------------*/
258
259/* Non-critical helper, heuristic for reducing the number of tmp-tmp
260   copies made by flattening.  If in doubt return False. */
261
262static Bool isFlat ( IRExpr* e )
263{
264   if (e->tag == Iex_Get)
265      return True;
266   if (e->tag == Iex_Binop)
267      return toBool( isIRAtom(e->Iex.Binop.arg1)
268                     && isIRAtom(e->Iex.Binop.arg2) );
269   if (e->tag == Iex_Load)
270      return isIRAtom(e->Iex.Load.addr);
271   return False;
272}
273
274/* Flatten out 'ex' so it is atomic, returning a new expression with
275   the same value, after having appended extra IRTemp assignments to
276   the end of 'bb'. */
277
278static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
279{
280   Int i;
281   IRExpr** newargs;
282   IRType ty = typeOfIRExpr(bb->tyenv, ex);
283   IRTemp t1;
284
285   switch (ex->tag) {
286
287      case Iex_GetI:
288         t1 = newIRTemp(bb->tyenv, ty);
289         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
290            IRExpr_GetI(ex->Iex.GetI.descr,
291                        flatten_Expr(bb, ex->Iex.GetI.ix),
292                        ex->Iex.GetI.bias)));
293         return IRExpr_RdTmp(t1);
294
295      case Iex_Get:
296         t1 = newIRTemp(bb->tyenv, ty);
297         addStmtToIRSB(bb,
298            IRStmt_WrTmp(t1, ex));
299         return IRExpr_RdTmp(t1);
300
301      case Iex_Qop: {
302         IRQop* qop = ex->Iex.Qop.details;
303         t1 = newIRTemp(bb->tyenv, ty);
304         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
305            IRExpr_Qop(qop->op,
306                         flatten_Expr(bb, qop->arg1),
307                         flatten_Expr(bb, qop->arg2),
308                         flatten_Expr(bb, qop->arg3),
309                         flatten_Expr(bb, qop->arg4))));
310         return IRExpr_RdTmp(t1);
311      }
312
313      case Iex_Triop: {
314         IRTriop* triop = ex->Iex.Triop.details;
315         t1 = newIRTemp(bb->tyenv, ty);
316         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
317            IRExpr_Triop(triop->op,
318                         flatten_Expr(bb, triop->arg1),
319                         flatten_Expr(bb, triop->arg2),
320                         flatten_Expr(bb, triop->arg3))));
321         return IRExpr_RdTmp(t1);
322      }
323
324      case Iex_Binop:
325         t1 = newIRTemp(bb->tyenv, ty);
326         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
327            IRExpr_Binop(ex->Iex.Binop.op,
328                         flatten_Expr(bb, ex->Iex.Binop.arg1),
329                         flatten_Expr(bb, ex->Iex.Binop.arg2))));
330         return IRExpr_RdTmp(t1);
331
332      case Iex_Unop:
333         t1 = newIRTemp(bb->tyenv, ty);
334         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
335            IRExpr_Unop(ex->Iex.Unop.op,
336                        flatten_Expr(bb, ex->Iex.Unop.arg))));
337         return IRExpr_RdTmp(t1);
338
339      case Iex_Load:
340         t1 = newIRTemp(bb->tyenv, ty);
341         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342            IRExpr_Load(ex->Iex.Load.end,
343                        ex->Iex.Load.ty,
344                        flatten_Expr(bb, ex->Iex.Load.addr))));
345         return IRExpr_RdTmp(t1);
346
347      case Iex_CCall:
348         newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
349         for (i = 0; newargs[i]; i++)
350            newargs[i] = flatten_Expr(bb, newargs[i]);
351         t1 = newIRTemp(bb->tyenv, ty);
352         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
353            IRExpr_CCall(ex->Iex.CCall.cee,
354                         ex->Iex.CCall.retty,
355                         newargs)));
356         return IRExpr_RdTmp(t1);
357
358      case Iex_Mux0X:
359         t1 = newIRTemp(bb->tyenv, ty);
360         addStmtToIRSB(bb, IRStmt_WrTmp(t1,
361            IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
362                         flatten_Expr(bb, ex->Iex.Mux0X.expr0),
363                         flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
364         return IRExpr_RdTmp(t1);
365
366      case Iex_Const:
367         /* Lift F64i constants out onto temps so they can be CSEd
368            later. */
369         if (ex->Iex.Const.con->tag == Ico_F64i) {
370            t1 = newIRTemp(bb->tyenv, ty);
371            addStmtToIRSB(bb, IRStmt_WrTmp(t1,
372               IRExpr_Const(ex->Iex.Const.con)));
373            return IRExpr_RdTmp(t1);
374         } else {
375            /* Leave all other constants alone. */
376            return ex;
377         }
378
379      case Iex_RdTmp:
380         return ex;
381
382      default:
383         vex_printf("\n");
384         ppIRExpr(ex);
385         vex_printf("\n");
386         vpanic("flatten_Expr");
387   }
388}
389
390
391/* Append a completely flattened form of 'st' to the end of 'bb'. */
392
393static void flatten_Stmt ( IRSB* bb, IRStmt* st )
394{
395   Int i;
396   IRExpr  *e1, *e2, *e3, *e4, *e5;
397   IRDirty *d,  *d2;
398   IRCAS   *cas, *cas2;
399   IRPutI  *puti, *puti2;
400   switch (st->tag) {
401      case Ist_Put:
402         if (isIRAtom(st->Ist.Put.data)) {
403            /* optimisation to reduce the amount of heap wasted
404               by the flattener */
405            addStmtToIRSB(bb, st);
406         } else {
407            /* general case, always correct */
408            e1 = flatten_Expr(bb, st->Ist.Put.data);
409            addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
410         }
411         break;
412      case Ist_PutI:
413         puti = st->Ist.PutI.details;
414         e1 = flatten_Expr(bb, puti->ix);
415         e2 = flatten_Expr(bb, puti->data);
416         puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
417         addStmtToIRSB(bb, IRStmt_PutI(puti2));
418         break;
419      case Ist_WrTmp:
420         if (isFlat(st->Ist.WrTmp.data)) {
421            /* optimisation, to reduce the number of tmp-tmp
422               copies generated */
423            addStmtToIRSB(bb, st);
424         } else {
425            /* general case, always correct */
426            e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
427            addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
428         }
429         break;
430      case Ist_Store:
431         e1 = flatten_Expr(bb, st->Ist.Store.addr);
432         e2 = flatten_Expr(bb, st->Ist.Store.data);
433         addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
434         break;
435      case Ist_CAS:
436         cas  = st->Ist.CAS.details;
437         e1   = flatten_Expr(bb, cas->addr);
438         e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
439         e3   = flatten_Expr(bb, cas->expdLo);
440         e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
441         e5   = flatten_Expr(bb, cas->dataLo);
442         cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
443                         e1, e2, e3, e4, e5 );
444         addStmtToIRSB(bb, IRStmt_CAS(cas2));
445         break;
446      case Ist_LLSC:
447         e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
448         e2 = st->Ist.LLSC.storedata
449                 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
450                 : NULL;
451         addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
452                                       st->Ist.LLSC.result, e1, e2));
453         break;
454      case Ist_Dirty:
455         d = st->Ist.Dirty.details;
456         d2 = emptyIRDirty();
457         *d2 = *d;
458         d2->args = shallowCopyIRExprVec(d2->args);
459         if (d2->mFx != Ifx_None) {
460            d2->mAddr = flatten_Expr(bb, d2->mAddr);
461         } else {
462            vassert(d2->mAddr == NULL);
463         }
464         d2->guard = flatten_Expr(bb, d2->guard);
465         for (i = 0; d2->args[i]; i++)
466            d2->args[i] = flatten_Expr(bb, d2->args[i]);
467         addStmtToIRSB(bb, IRStmt_Dirty(d2));
468         break;
469      case Ist_NoOp:
470      case Ist_MBE:
471      case Ist_IMark:
472         addStmtToIRSB(bb, st);
473         break;
474      case Ist_AbiHint:
475         e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
476         e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
477         addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
478         break;
479      case Ist_Exit:
480         e1 = flatten_Expr(bb, st->Ist.Exit.guard);
481         addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
482                                       st->Ist.Exit.dst,
483                                       st->Ist.Exit.offsIP));
484         break;
485      default:
486         vex_printf("\n");
487         ppIRStmt(st);
488         vex_printf("\n");
489         vpanic("flatten_Stmt");
490   }
491}
492
493
494static IRSB* flatten_BB ( IRSB* in )
495{
496   Int   i;
497   IRSB* out;
498   out = emptyIRSB();
499   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
500   for (i = 0; i < in->stmts_used; i++)
501      if (in->stmts[i])
502         flatten_Stmt( out, in->stmts[i] );
503   out->next     = flatten_Expr( out, in->next );
504   out->jumpkind = in->jumpkind;
505   out->offsIP   = in->offsIP;
506   return out;
507}
508
509
510/*---------------------------------------------------------------*/
511/*--- In-place removal of redundant GETs                      ---*/
512/*---------------------------------------------------------------*/
513
514/* Scan forwards, building up an environment binding (min offset, max
515   offset) pairs to values, which will either be temps or constants.
516
517   On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
518   env and if it matches, replace the Get with the stored value.  If
519   there is no match, add a (minoff,maxoff) :-> t binding.
520
521   On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
522   any binding which fully or partially overlaps with (minoff,maxoff).
523   Then add a new (minoff,maxoff) :-> t or c binding.  */
524
525/* Extract the min/max offsets from a guest state array descriptor. */
526
527inline
528static void getArrayBounds ( IRRegArray* descr,
529                             UInt* minoff, UInt* maxoff )
530{
531   *minoff = descr->base;
532   *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
533   vassert((*minoff & ~0xFFFF) == 0);
534   vassert((*maxoff & ~0xFFFF) == 0);
535   vassert(*minoff <= *maxoff);
536}
537
538/* Create keys, of the form ((minoffset << 16) | maxoffset). */
539
540static UInt mk_key_GetPut ( Int offset, IRType ty )
541{
542   /* offset should fit in 16 bits. */
543   UInt minoff = offset;
544   UInt maxoff = minoff + sizeofIRType(ty) - 1;
545   vassert((minoff & ~0xFFFF) == 0);
546   vassert((maxoff & ~0xFFFF) == 0);
547   return (minoff << 16) | maxoff;
548}
549
550static UInt mk_key_GetIPutI ( IRRegArray* descr )
551{
552   UInt minoff, maxoff;
553   getArrayBounds( descr, &minoff, &maxoff );
554   vassert((minoff & ~0xFFFF) == 0);
555   vassert((maxoff & ~0xFFFF) == 0);
556   return (minoff << 16) | maxoff;
557}
558
559/* Supposing h has keys of the form generated by mk_key_GetPut and
560   mk_key_GetIPutI, invalidate any key which overlaps (k_lo
561   .. k_hi).
562*/
563static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
564{
565   Int  j;
566   UInt e_lo, e_hi;
567   vassert(k_lo <= k_hi);
568   /* invalidate any env entries which in any way overlap (k_lo
569      .. k_hi) */
570   /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
571
572   for (j = 0; j < h->used; j++) {
573      if (!h->inuse[j])
574         continue;
575      e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
576      e_hi = ((UInt)h->key[j]) & 0xFFFF;
577      vassert(e_lo <= e_hi);
578      if (e_hi < k_lo || k_hi < e_lo)
579         continue; /* no overlap possible */
580      else
581         /* overlap; invalidate */
582         h->inuse[j] = False;
583   }
584}
585
586
587static void redundant_get_removal_BB ( IRSB* bb )
588{
589   HashHW* env = newHHW();
590   UInt    key = 0; /* keep gcc -O happy */
591   Int     i, j;
592   HWord   val;
593
594   for (i = 0; i < bb->stmts_used; i++) {
595      IRStmt* st = bb->stmts[i];
596
597      if (st->tag == Ist_NoOp)
598         continue;
599
600      /* Deal with Gets */
601      if (st->tag == Ist_WrTmp
602          && st->Ist.WrTmp.data->tag == Iex_Get) {
603         /* st is 't = Get(...)'.  Look up in the environment and see
604            if the Get can be replaced. */
605         IRExpr* get = st->Ist.WrTmp.data;
606         key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
607                                     get->Iex.Get.ty );
608         if (lookupHHW(env, &val, (HWord)key)) {
609            /* found it */
610            /* Note, we could do better here.  If the types are
611               different we don't do the substitution, since doing so
612               could lead to invalidly-typed IR.  An improvement would
613               be to stick in a reinterpret-style cast, although that
614               would make maintaining flatness more difficult. */
615            IRExpr* valE    = (IRExpr*)val;
616            Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
617                                      == st->Ist.WrTmp.data->Iex.Get.ty );
618            if (typesOK && DEBUG_IROPT) {
619               vex_printf("rGET: "); ppIRExpr(get);
620               vex_printf("  ->  "); ppIRExpr(valE);
621               vex_printf("\n");
622            }
623            if (typesOK)
624               bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
625         } else {
626            /* Not found, but at least we know that t and the Get(...)
627               are now associated.  So add a binding to reflect that
628               fact. */
629            addToHHW( env, (HWord)key,
630                           (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
631         }
632      }
633
634      /* Deal with Puts: invalidate any env entries overlapped by this
635         Put */
636      if (st->tag == Ist_Put || st->tag == Ist_PutI) {
637         UInt k_lo, k_hi;
638         if (st->tag == Ist_Put) {
639            key = mk_key_GetPut( st->Ist.Put.offset,
640                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
641         } else {
642            vassert(st->tag == Ist_PutI);
643            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
644         }
645
646         k_lo = (key >> 16) & 0xFFFF;
647         k_hi = key & 0xFFFF;
648         invalidateOverlaps(env, k_lo, k_hi);
649      }
650      else
651      if (st->tag == Ist_Dirty) {
652         /* Deal with dirty helpers which write or modify guest state.
653            Invalidate the entire env.  We could do a lot better
654            here. */
655         IRDirty* d      = st->Ist.Dirty.details;
656         Bool     writes = False;
657         for (j = 0; j < d->nFxState; j++) {
658            if (d->fxState[j].fx == Ifx_Modify
659                || d->fxState[j].fx == Ifx_Write)
660            writes = True;
661         }
662         if (writes) {
663            /* dump the entire env (not clever, but correct ...) */
664            for (j = 0; j < env->used; j++)
665               env->inuse[j] = False;
666            if (0) vex_printf("rGET: trash env due to dirty helper\n");
667         }
668      }
669
670      /* add this one to the env, if appropriate */
671      if (st->tag == Ist_Put) {
672         vassert(isIRAtom(st->Ist.Put.data));
673         addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
674      }
675
676   } /* for (i = 0; i < bb->stmts_used; i++) */
677
678}
679
680
681/*---------------------------------------------------------------*/
682/*--- In-place removal of redundant PUTs                      ---*/
683/*---------------------------------------------------------------*/
684
685/* Find any Get uses in st and invalidate any partially or fully
686   overlapping ranges listed in env.  Due to the flattening phase, the
687   only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
688
689static void handle_gets_Stmt (
690               HashHW* env,
691               IRStmt* st,
692               Bool (*preciseMemExnsFn)(Int,Int)
693            )
694{
695   Int     j;
696   UInt    key = 0; /* keep gcc -O happy */
697   Bool    isGet;
698   Bool    memRW = False;
699   IRExpr* e;
700
701   switch (st->tag) {
702
703      /* This is the only interesting case.  Deal with Gets in the RHS
704         expression. */
705      case Ist_WrTmp:
706         e = st->Ist.WrTmp.data;
707         switch (e->tag) {
708            case Iex_Get:
709               isGet = True;
710               key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
711               break;
712            case Iex_GetI:
713               isGet = True;
714               key = mk_key_GetIPutI ( e->Iex.GetI.descr );
715               break;
716            case Iex_Load:
717               isGet = False;
718               memRW = True;
719               break;
720            default:
721               isGet = False;
722         }
723         if (isGet) {
724            UInt k_lo, k_hi;
725            k_lo = (key >> 16) & 0xFFFF;
726            k_hi = key & 0xFFFF;
727            invalidateOverlaps(env, k_lo, k_hi);
728         }
729         break;
730
731      /* Be very conservative for dirty helper calls; dump the entire
732         environment.  The helper might read guest state, in which
733         case it needs to be flushed first.  Also, the helper might
734         access guest memory, in which case all parts of the guest
735         state requiring precise exceptions needs to be flushed.  The
736         crude solution is just to flush everything; we could easily
737         enough do a lot better if needed. */
738      /* Probably also overly-conservative, but also dump everything
739         if we hit a memory bus event (fence, lock, unlock).  Ditto
740         AbiHints, CASs, LLs and SCs. */
741      case Ist_AbiHint:
742         vassert(isIRAtom(st->Ist.AbiHint.base));
743         vassert(isIRAtom(st->Ist.AbiHint.nia));
744         /* fall through */
745      case Ist_MBE:
746      case Ist_Dirty:
747      case Ist_CAS:
748      case Ist_LLSC:
749         for (j = 0; j < env->used; j++)
750            env->inuse[j] = False;
751         break;
752
753      /* all other cases are boring. */
754      case Ist_Store:
755         vassert(isIRAtom(st->Ist.Store.addr));
756         vassert(isIRAtom(st->Ist.Store.data));
757         memRW = True;
758         break;
759
760      case Ist_Exit:
761         vassert(isIRAtom(st->Ist.Exit.guard));
762         break;
763
764      case Ist_PutI:
765         vassert(isIRAtom(st->Ist.PutI.details->ix));
766         vassert(isIRAtom(st->Ist.PutI.details->data));
767         break;
768
769      case Ist_NoOp:
770      case Ist_IMark:
771         break;
772
773      default:
774         vex_printf("\n");
775         ppIRStmt(st);
776         vex_printf("\n");
777         vpanic("handle_gets_Stmt");
778   }
779
780   if (memRW) {
781      /* This statement accesses memory.  So we need to dump all parts
782         of the environment corresponding to guest state that may not
783         be reordered with respect to memory references.  That means
784         at least the stack pointer. */
785      switch (vex_control.iropt_register_updates) {
786         case VexRegUpdAllregsAtMemAccess:
787            /* Precise exceptions required at mem access.
788               Flush all guest state. */
789            for (j = 0; j < env->used; j++)
790               env->inuse[j] = False;
791            break;
792         case VexRegUpdUnwindregsAtMemAccess:
793            for (j = 0; j < env->used; j++) {
794               if (!env->inuse[j])
795                  continue;
796               /* Just flush the minimal amount required, as computed by
797                  preciseMemExnsFn. */
798               HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
799               HWord k_hi = env->key[j] & 0xFFFF;
800               if (preciseMemExnsFn( k_lo, k_hi ))
801                  env->inuse[j] = False;
802            }
803            break;
804         default:
805            // VexRegUpdAllregsAtEachInsn cannot happen here.
806            // Neither any rubbish other value.
807            vassert(0);
808      }
809   } /* if (memRW) */
810
811}
812
813
814/* Scan backwards, building up a set of (min offset, max
815   offset) pairs, indicating those parts of the guest state
816   for which the next event is a write.
817
818   On seeing a conditional exit, empty the set.
819
820   On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
821   completely within the set, remove the Put.  Otherwise, add
822   (minoff,maxoff) to the set.
823
824   On seeing 'Get (minoff,maxoff)', remove any part of the set
825   overlapping (minoff,maxoff).  The same has to happen for any events
826   which implicitly read parts of the guest state: dirty helper calls
827   and loads/stores.
828*/
829
830static void redundant_put_removal_BB (
831               IRSB* bb,
832               Bool (*preciseMemExnsFn)(Int,Int)
833            )
834{
835   Int     i, j;
836   Bool    isPut;
837   IRStmt* st;
838   UInt    key = 0; /* keep gcc -O happy */
839
840   vassert
841      (vex_control.iropt_register_updates == VexRegUpdUnwindregsAtMemAccess
842       || vex_control.iropt_register_updates == VexRegUpdAllregsAtMemAccess);
843
844   HashHW* env = newHHW();
845
846   /* Initialise the running env with the fact that the final exit
847      writes the IP (or, whatever it claims to write.  We don't
848      care.) */
849   key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
850   addToHHW(env, (HWord)key, 0);
851
852   /* And now scan backwards through the statements. */
853   for (i = bb->stmts_used-1; i >= 0; i--) {
854      st = bb->stmts[i];
855
856      if (st->tag == Ist_NoOp)
857         continue;
858
859      /* Deal with conditional exits. */
860      if (st->tag == Ist_Exit) {
861         //Bool re_add;
862         /* Need to throw out from the env, any part of it which
863            doesn't overlap with the guest state written by this exit.
864            Since the exit only writes one section, it's simplest to
865            do this: (1) check whether env contains a write that
866            completely overlaps the write done by this exit; (2) empty
867            out env; and (3) if (1) was true, add the write done by
868            this exit.
869
870            To make (1) a bit simpler, merely search for a write that
871            exactly matches the one done by this exit.  That's safe
872            because it will fail as often or more often than a full
873            overlap check, and failure to find an overlapping write in
874            env is the safe case (we just nuke env if that
875            happens). */
876         //vassert(isIRAtom(st->Ist.Exit.guard));
877         /* (1) */
878         //key = mk_key_GetPut(st->Ist.Exit.offsIP,
879         //                    typeOfIRConst(st->Ist.Exit.dst));
880         //re_add = lookupHHW(env, NULL, key);
881         /* (2) */
882         for (j = 0; j < env->used; j++)
883            env->inuse[j] = False;
884         /* (3) */
885         //if (0 && re_add)
886         //   addToHHW(env, (HWord)key, 0);
887         continue;
888      }
889
890      /* Deal with Puts */
891      switch (st->tag) {
892         case Ist_Put:
893            isPut = True;
894            key = mk_key_GetPut( st->Ist.Put.offset,
895                                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
896            vassert(isIRAtom(st->Ist.Put.data));
897            break;
898         case Ist_PutI:
899            isPut = True;
900            key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
901            vassert(isIRAtom(st->Ist.PutI.details->ix));
902            vassert(isIRAtom(st->Ist.PutI.details->data));
903            break;
904         default:
905            isPut = False;
906      }
907      if (isPut && st->tag != Ist_PutI) {
908         /* See if any single entry in env overlaps this Put.  This is
909            simplistic in that the transformation is valid if, say, two
910            or more entries in the env overlap this Put, but the use of
911            lookupHHW will only find a single entry which exactly
912            overlaps this Put.  This is suboptimal but safe. */
913         if (lookupHHW(env, NULL, (HWord)key)) {
914            /* This Put is redundant because a later one will overwrite
915               it.  So NULL (nop) it out. */
916            if (DEBUG_IROPT) {
917               vex_printf("rPUT: "); ppIRStmt(st);
918               vex_printf("\n");
919            }
920            bb->stmts[i] = IRStmt_NoOp();
921         } else {
922            /* We can't demonstrate that this Put is redundant, so add it
923               to the running collection. */
924            addToHHW(env, (HWord)key, 0);
925         }
926         continue;
927      }
928
929      /* Deal with Gets.  These remove bits of the environment since
930         appearance of a Get means that the next event for that slice
931         of the guest state is no longer a write, but a read.  Also
932         deals with implicit reads of guest state needed to maintain
933         precise exceptions. */
934      handle_gets_Stmt( env, st, preciseMemExnsFn );
935   }
936}
937
938
939/*---------------------------------------------------------------*/
940/*--- Constant propagation and folding                        ---*/
941/*---------------------------------------------------------------*/
942
943#if STATS_IROPT
944/* How often sameIRExprs was invoked */
945static UInt invocation_count;
946/* How often sameIRExprs recursed through IRTemp assignments */
947static UInt recursion_count;
948/* How often sameIRExprs found identical IRExprs */
949static UInt success_count;
950/* How often recursing through assignments to IRTemps helped
951   establishing equality. */
952static UInt recursion_success_count;
953/* Whether or not recursing through an IRTemp assignment helped
954   establishing IRExpr equality for a given sameIRExprs invocation. */
955static Bool recursion_helped;
956/* Whether or not a given sameIRExprs invocation recursed through an
957   IRTemp assignment */
958static Bool recursed;
959/* Maximum number of nodes ever visited when comparing two IRExprs. */
960static UInt max_nodes_visited;
961#endif /* STATS_IROPT */
962
963/* Count the number of nodes visited for a given sameIRExprs invocation. */
964static UInt num_nodes_visited;
965
966/* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
967   This is to guard against performance degradation by visiting large
968   trees without success. */
969#define NODE_LIMIT 30
970
971
972/* The env in this section is a map from IRTemp to IRExpr*,
973   that is, an array indexed by IRTemp. */
974
975/* Do both expressions compute the same value? The answer is generally
976   conservative, i.e. it will report that the expressions do not compute
977   the same value when in fact they do. The reason is that we do not
978   keep track of changes in the guest state and memory. Thusly, two
979   Get's, GetI's or Load's, even when accessing the same location, will be
980   assumed to compute different values. After all the accesses may happen
981   at different times and the guest state / memory can have changed in
982   the meantime.
983
984   XXX IMPORTANT XXX the two expressions must have the same IR type.
985   DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
986
987/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
988   wrapper that deals with the common tags-don't-match case, and a
989   slower out of line general case.  Saves a few insns. */
990
991__attribute__((noinline))
992static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
993
994inline
995static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
996{
997   if (e1->tag != e2->tag) return False;
998   return sameIRExprs_aux2(env, e1, e2);
999}
1000
1001__attribute__((noinline))
1002static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1003{
1004   if (num_nodes_visited++ > NODE_LIMIT) return False;
1005
1006   switch (e1->tag) {
1007      case Iex_RdTmp:
1008         if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1009
1010         if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1011            Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1012                                        env[e2->Iex.RdTmp.tmp]);
1013#if STATS_IROPT
1014            recursed = True;
1015            if (same) recursion_helped = True;
1016#endif
1017            return same;
1018         }
1019         return False;
1020
1021      case Iex_Get:
1022      case Iex_GetI:
1023      case Iex_Load:
1024         /* Guest state / memory could have changed in the meantime. */
1025         return False;
1026
1027      case Iex_Binop:
1028         return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1029                        && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1030                                                 e2->Iex.Binop.arg1 )
1031                        && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1032                                                 e2->Iex.Binop.arg2 ));
1033
1034      case Iex_Unop:
1035         return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1036                        && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1037                                                 e2->Iex.Unop.arg ));
1038
1039      case Iex_Const: {
1040         IRConst *c1 = e1->Iex.Const.con;
1041         IRConst *c2 = e2->Iex.Const.con;
1042         vassert(c1->tag == c2->tag);
1043         switch (c1->tag) {
1044            case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
1045            case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
1046            case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
1047            case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
1048            case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
1049            default: break;
1050         }
1051         return False;
1052      }
1053
1054      case Iex_Triop: {
1055         IRTriop *tri1 = e1->Iex.Triop.details;
1056         IRTriop *tri2 = e2->Iex.Triop.details;
1057         return toBool( tri1->op == tri2->op
1058                        && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1059                        && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1060                        && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1061      }
1062
1063      case Iex_Mux0X:
1064         return toBool(    sameIRExprs_aux( env, e1->Iex.Mux0X.cond,
1065                                                 e2->Iex.Mux0X.cond )
1066                        && sameIRExprs_aux( env, e1->Iex.Mux0X.expr0,
1067                                                 e2->Iex.Mux0X.expr0 )
1068                        && sameIRExprs_aux( env, e1->Iex.Mux0X.exprX,
1069                                                 e2->Iex.Mux0X.exprX ));
1070
1071      default:
1072         /* Not very likely to be "same". */
1073         break;
1074   }
1075
1076   return False;
1077}
1078
1079inline
1080static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1081{
1082   Bool same;
1083
1084   num_nodes_visited = 0;
1085   same = sameIRExprs_aux(env, e1, e2);
1086
1087#if STATS_IROPT
1088   ++invocation_count;
1089   if (recursed) ++recursion_count;
1090   success_count += same;
1091   if (same && recursion_helped)
1092      ++recursion_success_count;
1093   if (num_nodes_visited > max_nodes_visited)
1094      max_nodes_visited = num_nodes_visited;
1095   recursed = False; /* reset */
1096   recursion_helped = False;  /* reset */
1097#endif /* STATS_IROPT */
1098
1099   return same;
1100}
1101
1102
1103/* Debugging-only hack (not used in production runs): make a guess
1104   whether sameIRExprs might assert due to the two args being of
1105   different types.  If in doubt return False.  Is only used when
1106   --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1107   Bad because it duplicates functionality from typeOfIRExpr.  See
1108   comment on the single use point below for rationale. */
1109static
1110Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1111{
1112   if (e1->tag != e2->tag) return False;
1113   switch (e1->tag) {
1114      case Iex_Const: {
1115         /* The only interesting case */
1116         IRConst *c1 = e1->Iex.Const.con;
1117         IRConst *c2 = e2->Iex.Const.con;
1118         return c1->tag != c2->tag;
1119      }
1120      default:
1121         break;
1122   }
1123   return False;
1124}
1125
1126
1127/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1128static Bool isZeroU32 ( IRExpr* e )
1129{
1130   return toBool( e->tag == Iex_Const
1131                  && e->Iex.Const.con->tag == Ico_U32
1132                  && e->Iex.Const.con->Ico.U32 == 0);
1133}
1134
1135/* Is this literally IRExpr_Const(IRConst_U32(1---1)) ? */
1136static Bool isOnesU32 ( IRExpr* e )
1137{
1138   return toBool( e->tag == Iex_Const
1139                  && e->Iex.Const.con->tag == Ico_U32
1140                  && e->Iex.Const.con->Ico.U32 == 0xFFFFFFFF );
1141}
1142
1143/* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
1144static Bool isZeroU64 ( IRExpr* e )
1145{
1146   return toBool( e->tag == Iex_Const
1147                  && e->Iex.Const.con->tag == Ico_U64
1148                  && e->Iex.Const.con->Ico.U64 == 0);
1149}
1150
1151/* Is this an integer constant with value 0 ? */
1152static Bool isZeroU ( IRExpr* e )
1153{
1154   if (e->tag != Iex_Const) return False;
1155   switch (e->Iex.Const.con->tag) {
1156      case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
1157      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
1158      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
1159      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
1160      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
1161      default: vpanic("isZeroU");
1162   }
1163}
1164
1165/* Is this an integer constant with value 1---1b ? */
1166static Bool isOnesU ( IRExpr* e )
1167{
1168   if (e->tag != Iex_Const) return False;
1169   switch (e->Iex.Const.con->tag) {
1170      case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
1171      case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1172      case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
1173                                     == 0xFFFFFFFF);
1174      case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
1175                                     == 0xFFFFFFFFFFFFFFFFULL);
1176      default: ppIRExpr(e); vpanic("isOnesU");
1177   }
1178}
1179
1180static Bool notBool ( Bool b )
1181{
1182   if (b == True) return False;
1183   if (b == False) return True;
1184   vpanic("notBool");
1185}
1186
1187/* Make a zero which has the same type as the result of the given
1188   primop. */
1189static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1190{
1191   switch (op) {
1192      case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1193      case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
1194      case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1195      case Iop_Sub32:
1196      case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1197      case Iop_Sub64:
1198      case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1199      case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
1200      default: vpanic("mkZeroOfPrimopResultType: bad primop");
1201   }
1202}
1203
1204/* Make a value containing all 1-bits, which has the same type as the
1205   result of the given primop. */
1206static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1207{
1208   switch (op) {
1209      case Iop_CmpEQ32:
1210      case Iop_CmpEQ64:
1211         return IRExpr_Const(IRConst_U1(toBool(1)));
1212      case Iop_Or8:
1213         return IRExpr_Const(IRConst_U8(0xFF));
1214      case Iop_Or16:
1215         return IRExpr_Const(IRConst_U16(0xFFFF));
1216      case Iop_Or32:
1217         return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1218      case Iop_CmpEQ8x8:
1219      case Iop_Or64:
1220         return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1221      case Iop_CmpEQ8x16:
1222      case Iop_CmpEQ16x8:
1223      case Iop_CmpEQ32x4:
1224         return IRExpr_Const(IRConst_V128(0xFFFF));
1225      default:
1226         ppIROp(op);
1227         vpanic("mkOnesOfPrimopResultType: bad primop");
1228   }
1229}
1230
1231/* Helpers for folding Clz32/64. */
1232static UInt fold_Clz64 ( ULong value )
1233{
1234   UInt i;
1235   vassert(value != 0ULL); /* no defined semantics for arg==0 */
1236   for (i = 0; i < 64; ++i) {
1237      if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1238   }
1239   vassert(0);
1240   /*NOTREACHED*/
1241   return 0;
1242}
1243
1244static UInt fold_Clz32 ( UInt value )
1245{
1246   UInt i;
1247   vassert(value != 0); /* no defined semantics for arg==0 */
1248   for (i = 0; i < 32; ++i) {
1249      if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1250   }
1251   vassert(0);
1252   /*NOTREACHED*/
1253   return 0;
1254}
1255
1256/* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
1257   the corresponding real constant. */
1258//XXX re-check this before use
1259//static ULong de_summarise_V64 ( UChar v64 )
1260//{
1261//   ULong r = 0;
1262//   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1263//   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1264//   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1265//   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1266//   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1267//   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1268//   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1269//   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1270//   return r;
1271//}
1272
1273static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1274{
1275   Int     shift;
1276   IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1277
1278   switch (e->tag) {
1279   case Iex_Unop:
1280      /* UNARY ops */
1281      if (e->Iex.Unop.arg->tag == Iex_Const) {
1282         switch (e->Iex.Unop.op) {
1283         case Iop_1Uto8:
1284            e2 = IRExpr_Const(IRConst_U8(toUChar(
1285                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1286                    ? 1 : 0)));
1287            break;
1288         case Iop_1Uto32:
1289            e2 = IRExpr_Const(IRConst_U32(
1290                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1291                    ? 1 : 0));
1292            break;
1293         case Iop_1Uto64:
1294            e2 = IRExpr_Const(IRConst_U64(
1295                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1296                    ? 1 : 0));
1297            break;
1298
1299         case Iop_1Sto8:
1300            e2 = IRExpr_Const(IRConst_U8(toUChar(
1301                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1302                    ? 0xFF : 0)));
1303            break;
1304         case Iop_1Sto16:
1305            e2 = IRExpr_Const(IRConst_U16(toUShort(
1306                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1307                    ? 0xFFFF : 0)));
1308            break;
1309         case Iop_1Sto32:
1310            e2 = IRExpr_Const(IRConst_U32(
1311                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1312                    ? 0xFFFFFFFF : 0));
1313            break;
1314         case Iop_1Sto64:
1315            e2 = IRExpr_Const(IRConst_U64(
1316                    e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1317                    ? 0xFFFFFFFFFFFFFFFFULL : 0));
1318            break;
1319
1320         case Iop_8Sto32: {
1321            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1322            s32 <<= 24;
1323            s32 >>= 24;
1324            e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1325            break;
1326         }
1327         case Iop_16Sto32: {
1328            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1329            s32 <<= 16;
1330            s32 >>= 16;
1331            e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
1332            break;
1333         }
1334         case Iop_8Uto64:
1335            e2 = IRExpr_Const(IRConst_U64(
1336                    0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1337            break;
1338         case Iop_16Uto64:
1339            e2 = IRExpr_Const(IRConst_U64(
1340                    0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1341            break;
1342         case Iop_8Uto32:
1343            e2 = IRExpr_Const(IRConst_U32(
1344                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1345            break;
1346         case Iop_8Sto16: {
1347            /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1348            s16 <<= 8;
1349            s16 >>= 8;
1350            e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
1351            break;
1352         }
1353         case Iop_8Uto16:
1354            e2 = IRExpr_Const(IRConst_U16(
1355                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1356            break;
1357         case Iop_16Uto32:
1358            e2 = IRExpr_Const(IRConst_U32(
1359                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1360            break;
1361         case Iop_32to16:
1362            e2 = IRExpr_Const(IRConst_U16(toUShort(
1363                    0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1364            break;
1365         case Iop_32to8:
1366            e2 = IRExpr_Const(IRConst_U8(toUChar(
1367                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1368            break;
1369         case Iop_32to1:
1370            e2 = IRExpr_Const(IRConst_U1(toBool(
1371                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1372                 )));
1373            break;
1374         case Iop_64to1:
1375            e2 = IRExpr_Const(IRConst_U1(toBool(
1376                    1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1377                 )));
1378            break;
1379
1380         case Iop_NotV128:
1381            e2 = IRExpr_Const(IRConst_V128(
1382                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1383            break;
1384         case Iop_Not64:
1385            e2 = IRExpr_Const(IRConst_U64(
1386                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1387            break;
1388         case Iop_Not32:
1389            e2 = IRExpr_Const(IRConst_U32(
1390                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1391            break;
1392         case Iop_Not16:
1393            e2 = IRExpr_Const(IRConst_U16(toUShort(
1394                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1395            break;
1396         case Iop_Not8:
1397            e2 = IRExpr_Const(IRConst_U8(toUChar(
1398                    ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1399            break;
1400
1401         case Iop_Not1:
1402            e2 = IRExpr_Const(IRConst_U1(
1403                    notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1404            break;
1405
1406         case Iop_64to8: {
1407            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1408            w64 &= 0xFFULL;
1409            e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1410            break;
1411         }
1412         case Iop_64to16: {
1413            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1414            w64 &= 0xFFFFULL;
1415            e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1416            break;
1417         }
1418         case Iop_64to32: {
1419            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1420            w64 &= 0x00000000FFFFFFFFULL;
1421            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1422            break;
1423         }
1424         case Iop_64HIto32: {
1425            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1426            w64 >>= 32;
1427            e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1428            break;
1429         }
1430         case Iop_32Uto64:
1431            e2 = IRExpr_Const(IRConst_U64(
1432                    0xFFFFFFFFULL
1433                    & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1434            break;
1435         case Iop_16Sto64: {
1436            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1437            s64 <<= 48;
1438            s64 >>= 48;
1439            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1440            break;
1441         }
1442         case Iop_32Sto64: {
1443            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1444            s64 <<= 32;
1445            s64 >>= 32;
1446            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1447            break;
1448         }
1449
1450         case Iop_16to8: {
1451            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1452            w16 &= 0xFF;
1453            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1454            break;
1455         }
1456         case Iop_16HIto8: {
1457            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1458            w16 >>= 8;
1459            w16 &= 0xFF;
1460            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1461            break;
1462         }
1463
1464         case Iop_CmpNEZ8:
1465            e2 = IRExpr_Const(IRConst_U1(toBool(
1466                    0 !=
1467                    (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1468                 )));
1469            break;
1470         case Iop_CmpNEZ32:
1471            e2 = IRExpr_Const(IRConst_U1(toBool(
1472                    0 !=
1473                    (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1474                 )));
1475            break;
1476         case Iop_CmpNEZ64:
1477            e2 = IRExpr_Const(IRConst_U1(toBool(
1478                    0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1479                 )));
1480            break;
1481
1482         case Iop_CmpwNEZ32: {
1483            UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1484            if (w32 == 0)
1485               e2 = IRExpr_Const(IRConst_U32( 0 ));
1486            else
1487               e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1488            break;
1489         }
1490         case Iop_CmpwNEZ64: {
1491            ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1492            if (w64 == 0)
1493               e2 = IRExpr_Const(IRConst_U64( 0 ));
1494            else
1495               e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1496            break;
1497         }
1498
1499         case Iop_Left32: {
1500            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1501            Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1502            s32 = (s32 | (-s32));
1503            e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1504            break;
1505         }
1506
1507         case Iop_Left64: {
1508            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1509            Long  s64 = (Long)u64;
1510            s64 = (s64 | (-s64));
1511            e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1512            break;
1513         }
1514
1515         case Iop_Clz32: {
1516            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1517            if (u32 != 0)
1518               e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1519            break;
1520         }
1521         case Iop_Clz64: {
1522            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1523            if (u64 != 0ULL)
1524               e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1525            break;
1526         }
1527
1528         /* For these vector ones, can't fold all cases, but at least
1529            do the most obvious one.  Could do better here using
1530            summarise/desummarise of vector constants, but too
1531            difficult to verify; hence just handle the zero cases. */
1532         case Iop_32UtoV128: {
1533            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1534            if (0 == u32) {
1535               e2 = IRExpr_Const(IRConst_V128(0x0000));
1536            } else {
1537               goto unhandled;
1538            }
1539            break;
1540         }
1541         case Iop_V128to64: {
1542            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1543            if (0 == ((v128 >> 0) & 0xFF)) {
1544               e2 = IRExpr_Const(IRConst_U64(0));
1545            } else {
1546               goto unhandled;
1547            }
1548            break;
1549         }
1550         case Iop_V128HIto64: {
1551            UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1552            if (0 == ((v128 >> 8) & 0xFF)) {
1553               e2 = IRExpr_Const(IRConst_U64(0));
1554            } else {
1555               goto unhandled;
1556            }
1557            break;
1558         }
1559         case Iop_64UtoV128: {
1560            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1561            if (0 == u64) {
1562               e2 = IRExpr_Const(IRConst_V128(0x0000));
1563            } else {
1564               goto unhandled;
1565            }
1566            break;
1567         }
1568
1569         /* Even stupider (although still correct ..) */
1570         case Iop_V256to64_0: case Iop_V256to64_1:
1571         case Iop_V256to64_2: case Iop_V256to64_3: {
1572            UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1573            if (v256 == 0x00000000) {
1574               e2 = IRExpr_Const(IRConst_U64(0));
1575            } else {
1576               goto unhandled;
1577            }
1578            break;
1579         }
1580
1581         default:
1582            goto unhandled;
1583      }
1584      }
1585      break;
1586
1587   case Iex_Binop:
1588      /* BINARY ops */
1589      if (e->Iex.Binop.arg1->tag == Iex_Const
1590          && e->Iex.Binop.arg2->tag == Iex_Const) {
1591         /* cases where both args are consts */
1592         switch (e->Iex.Binop.op) {
1593
1594            /* -- Or -- */
1595            case Iop_Or8:
1596               e2 = IRExpr_Const(IRConst_U8(toUChar(
1597                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1598                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1599               break;
1600            case Iop_Or16:
1601               e2 = IRExpr_Const(IRConst_U16(toUShort(
1602                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1603                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1604               break;
1605            case Iop_Or32:
1606               e2 = IRExpr_Const(IRConst_U32(
1607                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1608                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1609               break;
1610            case Iop_Or64:
1611               e2 = IRExpr_Const(IRConst_U64(
1612                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1613                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1614               break;
1615            case Iop_OrV128:
1616               e2 = IRExpr_Const(IRConst_V128(
1617                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1618                        | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1619               break;
1620
1621            /* -- Xor -- */
1622            case Iop_Xor8:
1623               e2 = IRExpr_Const(IRConst_U8(toUChar(
1624                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1625                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1626               break;
1627            case Iop_Xor16:
1628               e2 = IRExpr_Const(IRConst_U16(toUShort(
1629                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1630                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1631               break;
1632            case Iop_Xor32:
1633               e2 = IRExpr_Const(IRConst_U32(
1634                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1635                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1636               break;
1637            case Iop_Xor64:
1638               e2 = IRExpr_Const(IRConst_U64(
1639                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1640                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1641               break;
1642            case Iop_XorV128:
1643               e2 = IRExpr_Const(IRConst_V128(
1644                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1645                        ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1646               break;
1647
1648            /* -- And -- */
1649            case Iop_And8:
1650               e2 = IRExpr_Const(IRConst_U8(toUChar(
1651                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1652                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1653               break;
1654            case Iop_And16:
1655               e2 = IRExpr_Const(IRConst_U16(toUShort(
1656                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1657                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1658               break;
1659            case Iop_And32:
1660               e2 = IRExpr_Const(IRConst_U32(
1661                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1662                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1663               break;
1664            case Iop_And64:
1665               e2 = IRExpr_Const(IRConst_U64(
1666                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1667                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1668               break;
1669            case Iop_AndV128:
1670               e2 = IRExpr_Const(IRConst_V128(
1671                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1672                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1673               break;
1674
1675            /* -- Add -- */
1676            case Iop_Add8:
1677               e2 = IRExpr_Const(IRConst_U8(toUChar(
1678                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1679                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1680               break;
1681            case Iop_Add32:
1682               e2 = IRExpr_Const(IRConst_U32(
1683                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1684                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1685               break;
1686            case Iop_Add64:
1687               e2 = IRExpr_Const(IRConst_U64(
1688                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1689                        + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1690               break;
1691
1692            /* -- Sub -- */
1693            case Iop_Sub8:
1694               e2 = IRExpr_Const(IRConst_U8(toUChar(
1695                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1696                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1697               break;
1698            case Iop_Sub32:
1699               e2 = IRExpr_Const(IRConst_U32(
1700                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1701                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1702               break;
1703            case Iop_Sub64:
1704               e2 = IRExpr_Const(IRConst_U64(
1705                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1706                        - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1707               break;
1708
1709            /* -- Max32U -- */
1710            case Iop_Max32U: {
1711               UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1712               UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1713               UInt res  = u32a > u32b ? u32a : u32b;
1714               e2 = IRExpr_Const(IRConst_U32(res));
1715               break;
1716            }
1717
1718            /* -- Mul -- */
1719            case Iop_Mul32:
1720               e2 = IRExpr_Const(IRConst_U32(
1721                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1722                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1723               break;
1724            case Iop_Mul64:
1725               e2 = IRExpr_Const(IRConst_U64(
1726                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1727                        * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1728               break;
1729
1730            case Iop_MullS32: {
1731               /* very paranoid */
1732               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1733               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1734               Int   s32a = (Int)u32a;
1735               Int   s32b = (Int)u32b;
1736               Long  s64a = (Long)s32a;
1737               Long  s64b = (Long)s32b;
1738               Long  sres = s64a * s64b;
1739               ULong ures = (ULong)sres;
1740               e2 = IRExpr_Const(IRConst_U64(ures));
1741               break;
1742            }
1743
1744            /* -- Shl -- */
1745            case Iop_Shl32:
1746               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1747               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1748               if (shift >= 0 && shift <= 31)
1749                  e2 = IRExpr_Const(IRConst_U32(
1750                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1751                           << shift)));
1752               break;
1753            case Iop_Shl64:
1754               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1755               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1756               if (shift >= 0 && shift <= 63)
1757                  e2 = IRExpr_Const(IRConst_U64(
1758                          (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1759                           << shift)));
1760               break;
1761
1762            /* -- Sar -- */
1763            case Iop_Sar32: {
1764               /* paranoid ... */
1765               /*signed*/ Int s32;
1766               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1767               s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1768               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1769               if (shift >= 0 && shift <= 31) {
1770                  s32 >>=/*signed*/ shift;
1771                  e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1772               }
1773               break;
1774            }
1775            case Iop_Sar64: {
1776               /* paranoid ... */
1777               /*signed*/ Long s64;
1778               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1779               s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1780               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1781               if (shift >= 0 && shift <= 63) {
1782                  s64 >>=/*signed*/ shift;
1783                  e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1784               }
1785               break;
1786            }
1787
1788            /* -- Shr -- */
1789            case Iop_Shr32: {
1790               /* paranoid ... */
1791               /*unsigned*/ UInt u32;
1792               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1793               u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1794               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1795               if (shift >= 0 && shift <= 31) {
1796                  u32 >>=/*unsigned*/ shift;
1797                  e2 = IRExpr_Const(IRConst_U32(u32));
1798               }
1799               break;
1800            }
1801            case Iop_Shr64: {
1802               /* paranoid ... */
1803               /*unsigned*/ ULong u64;
1804               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1805               u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1806               shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1807               if (shift >= 0 && shift <= 63) {
1808                  u64 >>=/*unsigned*/ shift;
1809                  e2 = IRExpr_Const(IRConst_U64(u64));
1810               }
1811               break;
1812            }
1813
1814            /* -- CmpEQ -- */
1815            case Iop_CmpEQ32:
1816               e2 = IRExpr_Const(IRConst_U1(toBool(
1817                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1818                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1819               break;
1820            case Iop_CmpEQ64:
1821               e2 = IRExpr_Const(IRConst_U1(toBool(
1822                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1823                        == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1824               break;
1825
1826            /* -- CmpNE -- */
1827            case Iop_CmpNE8:
1828               e2 = IRExpr_Const(IRConst_U1(toBool(
1829                       ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1830                        != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1831               break;
1832            case Iop_CmpNE32:
1833               e2 = IRExpr_Const(IRConst_U1(toBool(
1834                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1835                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1836               break;
1837            case Iop_CmpNE64:
1838               e2 = IRExpr_Const(IRConst_U1(toBool(
1839                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1840                        != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1841               break;
1842
1843            /* -- CmpLEU -- */
1844            case Iop_CmpLE32U:
1845               e2 = IRExpr_Const(IRConst_U1(toBool(
1846                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1847                        <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1848               break;
1849            case Iop_CmpLE64U:
1850               e2 = IRExpr_Const(IRConst_U1(toBool(
1851                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1852                        <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1853               break;
1854
1855            /* -- CmpLES -- */
1856            case Iop_CmpLE32S:
1857               e2 = IRExpr_Const(IRConst_U1(toBool(
1858                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1859                        <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1860               break;
1861            case Iop_CmpLE64S:
1862               e2 = IRExpr_Const(IRConst_U1(toBool(
1863                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1864                        <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1865               break;
1866
1867            /* -- CmpLTS -- */
1868            case Iop_CmpLT32S:
1869               e2 = IRExpr_Const(IRConst_U1(toBool(
1870                       ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1871                        < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1872               break;
1873            case Iop_CmpLT64S:
1874               e2 = IRExpr_Const(IRConst_U1(toBool(
1875                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1876                        < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1877               break;
1878
1879            /* -- CmpLTU -- */
1880            case Iop_CmpLT32U:
1881               e2 = IRExpr_Const(IRConst_U1(toBool(
1882                       ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1883                        < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1884               break;
1885            case Iop_CmpLT64U:
1886               e2 = IRExpr_Const(IRConst_U1(toBool(
1887                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1888                        < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1889               break;
1890
1891            /* -- CmpORD -- */
1892            case Iop_CmpORD32S: {
1893               /* very paranoid */
1894               UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1895               UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1896               Int   s32a = (Int)u32a;
1897               Int   s32b = (Int)u32b;
1898               Int   r = 0x2; /* EQ */
1899               if (s32a < s32b) {
1900                  r = 0x8; /* LT */
1901               }
1902               else if (s32a > s32b) {
1903                  r = 0x4; /* GT */
1904               }
1905               e2 = IRExpr_Const(IRConst_U32(r));
1906               break;
1907            }
1908
1909            /* -- nHLto2n -- */
1910            case Iop_32HLto64:
1911               e2 = IRExpr_Const(IRConst_U64(
1912                       (((ULong)(e->Iex.Binop.arg1
1913                                  ->Iex.Const.con->Ico.U32)) << 32)
1914                       | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1915                    ));
1916               break;
1917            case Iop_64HLto128:
1918               /* We can't fold this, because there is no way to
1919                  express he result in IR, but at least pretend to
1920                  handle it, so as to stop getting blasted with
1921                  no-rule-for-this-primop messages. */
1922               break;
1923            /* For this vector one, can't fold all cases, but at
1924               least do the most obvious one.  Could do better here
1925               using summarise/desummarise of vector constants, but
1926               too difficult to verify; hence just handle the zero
1927               cases. */
1928            case Iop_64HLtoV128: {
1929               ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
1930               ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
1931               if (0 == argHi && 0 == argLo) {
1932                  e2 = IRExpr_Const(IRConst_V128(0));
1933               } else {
1934                  goto unhandled;
1935               }
1936               break;
1937            }
1938
1939            /* -- V128 stuff -- */
1940            case Iop_InterleaveLO8x16: {
1941               /* This turns up a lot in Memcheck instrumentation of
1942                  Icc generated code.  I don't know why. */
1943               UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
1944               UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
1945               if (0 == arg1 && 0 == arg2) {
1946                  e2 = IRExpr_Const(IRConst_V128(0));
1947               } else {
1948                  goto unhandled;
1949               }
1950               break;
1951            }
1952
1953            default:
1954               goto unhandled;
1955         }
1956
1957      } else {
1958
1959         /* other cases (identities, etc) */
1960         switch (e->Iex.Binop.op) {
1961
1962            case Iop_Shl32:
1963            case Iop_Shl64:
1964            case Iop_Shr64:
1965               /* Shl32/Shl64/Shr64(x,0) ==> x */
1966               if (isZeroU(e->Iex.Binop.arg2)) {
1967                  e2 = e->Iex.Binop.arg1;
1968                  break;
1969               }
1970               /* Shl32/Shl64/Shr64(0,x) ==> 0 */
1971               if (isZeroU(e->Iex.Binop.arg1)) {
1972                  e2 = e->Iex.Binop.arg1;
1973                  break;
1974               }
1975               break;
1976
1977            case Iop_Shr32:
1978               /* Shr32(x,0) ==> x */
1979               if (isZeroU(e->Iex.Binop.arg2)) {
1980                  e2 = e->Iex.Binop.arg1;
1981                  break;
1982               }
1983               break;
1984
1985            case Iop_Or8:
1986            case Iop_Or16:
1987            case Iop_Or32:
1988            case Iop_Or64:
1989            case Iop_Max32U:
1990               /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
1991               if (isZeroU(e->Iex.Binop.arg2)) {
1992                  e2 = e->Iex.Binop.arg1;
1993                  break;
1994               }
1995               /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
1996               if (isZeroU(e->Iex.Binop.arg1)) {
1997                  e2 = e->Iex.Binop.arg2;
1998                  break;
1999               }
2000               /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2001               /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2002               if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2003                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2004                  break;
2005               }
2006               /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2007               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2008                  e2 = e->Iex.Binop.arg1;
2009                  break;
2010               }
2011               break;
2012
2013            case Iop_Add8:
2014               /* Add8(t,t) ==> t << 1.
2015                  Memcheck doesn't understand that
2016                  x+x produces a defined least significant bit, and it seems
2017                  simplest just to get rid of the problem by rewriting it
2018                  out, since the opportunity to do so exists. */
2019               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2020                  e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2021                                    IRExpr_Const(IRConst_U8(1)));
2022                  break;
2023               }
2024               break;
2025
2026               /* NB no Add16(t,t) case yet as no known test case exists */
2027
2028            case Iop_Add32:
2029            case Iop_Add64:
2030               /* Add32/Add64(x,0) ==> x */
2031               if (isZeroU(e->Iex.Binop.arg2)) {
2032                  e2 = e->Iex.Binop.arg1;
2033                  break;
2034               }
2035               /* Add32/Add64(0,x) ==> x */
2036               if (isZeroU(e->Iex.Binop.arg1)) {
2037                  e2 = e->Iex.Binop.arg2;
2038                  break;
2039               }
2040               /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2041               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2042                  e2 = IRExpr_Binop(
2043                          e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2044                          e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2045                  break;
2046               }
2047               break;
2048
2049            case Iop_Sub64:
2050               /* Sub64(x,0) ==> x */
2051               if (isZeroU64(e->Iex.Binop.arg2)) {
2052                  e2 = e->Iex.Binop.arg1;
2053                  break;
2054               }
2055               /* Sub64(t,t) ==> 0, for some IRTemp t */
2056               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2057                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2058                  break;
2059               }
2060               break;
2061
2062            case Iop_And32:
2063               /* And32(x,0xFFFFFFFF) ==> x */
2064               if (isOnesU32(e->Iex.Binop.arg2)) {
2065                  e2 = e->Iex.Binop.arg1;
2066                  break;
2067               }
2068               /* And32(x,0) ==> 0 */
2069               if (isZeroU32(e->Iex.Binop.arg2)) {
2070                  e2 = e->Iex.Binop.arg2;
2071                  break;
2072               }
2073               /* And32(0,x) ==> 0 */
2074               if (isZeroU32(e->Iex.Binop.arg1)) {
2075                  e2 = e->Iex.Binop.arg1;
2076                  break;
2077               }
2078               /* And32(t,t) ==> t, for some IRTemp t */
2079               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2080                  e2 = e->Iex.Binop.arg1;
2081                  break;
2082               }
2083               break;
2084
2085            case Iop_And8:
2086            case Iop_And16:
2087            case Iop_And64:
2088            case Iop_AndV128:
2089            case Iop_AndV256:
2090               /* And8/And16/And64/AndV128/AndV256(t,t)
2091                  ==> t, for some IRTemp t */
2092               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2093                  e2 = e->Iex.Binop.arg1;
2094                  break;
2095               }
2096               break;
2097
2098            case Iop_OrV128:
2099            case Iop_OrV256:
2100               /* V128/V256(t,t) ==> t, for some IRTemp t */
2101               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2102                  e2 = e->Iex.Binop.arg1;
2103                  break;
2104               }
2105               break;
2106
2107            case Iop_Xor8:
2108            case Iop_Xor16:
2109            case Iop_Xor32:
2110            case Iop_Xor64:
2111            case Iop_XorV128:
2112               /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2113               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2114                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2115                  break;
2116               }
2117               break;
2118
2119            case Iop_Sub32:
2120            case Iop_CmpNE32:
2121               /* Sub32/CmpNE32(t,t) ==> 0, for some IRTemp t */
2122               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2123                  e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2124                  break;
2125               }
2126               break;
2127
2128            case Iop_CmpEQ32:
2129            case Iop_CmpEQ64:
2130            case Iop_CmpEQ8x8:
2131            case Iop_CmpEQ8x16:
2132            case Iop_CmpEQ16x8:
2133            case Iop_CmpEQ32x4:
2134               if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2135                  e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2136                  break;
2137               }
2138               break;
2139
2140            default:
2141               break;
2142         }
2143      }
2144      break;
2145
2146   case Iex_Mux0X:
2147      /* Mux0X */
2148
2149      /* is the discriminant is a constant? */
2150      if (e->Iex.Mux0X.cond->tag == Iex_Const) {
2151         Bool zero;
2152         /* assured us by the IR type rules */
2153         vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
2154         zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
2155                                     ->Iex.Const.con->Ico.U8));
2156         e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
2157      }
2158      else
2159      /* are the arms identical? (pretty weedy test) */
2160      if (sameIRExprs(env, e->Iex.Mux0X.expr0,
2161                      e->Iex.Mux0X.exprX)) {
2162         e2 = e->Iex.Mux0X.expr0;
2163      }
2164      break;
2165
2166   default:
2167      /* not considered */
2168      break;
2169   }
2170
2171   /* Show cases where we've found but not folded 'op(t,t)'.  Be
2172      careful not to call sameIRExprs with values of different types,
2173      though, else it will assert (and so it should!).  We can't
2174      conveniently call typeOfIRExpr on the two args without a whole
2175      bunch of extra plumbing to pass in a type env, so just use a
2176      hacky test to check the arguments are not anything that might
2177      sameIRExprs to assert.  This is only OK because this kludge is
2178      only used for debug printing, not for "real" operation.  For
2179      "real" operation (ie, all other calls to sameIRExprs), it is
2180      essential that the to args have the same type.
2181
2182      The "right" solution is to plumb the containing block's
2183      IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
2184      that's a bunch of extra parameter passing which will just slow
2185      down the normal case, for no purpose. */
2186   if (vex_control.iropt_verbosity > 0
2187       && e == e2
2188       && e->tag == Iex_Binop
2189       && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2190                                                    e->Iex.Binop.arg2)
2191       && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2192      vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2193      ppIRExpr(e);
2194      vex_printf("\n");
2195   }
2196
2197   /* Show the overall results of folding. */
2198   if (DEBUG_IROPT && e2 != e) {
2199      vex_printf("FOLD: ");
2200      ppIRExpr(e); vex_printf("  ->  ");
2201      ppIRExpr(e2); vex_printf("\n");
2202   }
2203
2204   return e2;
2205
2206 unhandled:
2207#  if 0
2208   vex_printf("\n\n");
2209   ppIRExpr(e);
2210   vpanic("fold_Expr: no rule for the above");
2211#  else
2212   if (vex_control.iropt_verbosity > 0) {
2213      vex_printf("vex iropt: fold_Expr: no const rule for: ");
2214      ppIRExpr(e);
2215      vex_printf("\n");
2216   }
2217   return e2;
2218#  endif
2219}
2220
2221
2222/* Apply the subst to a simple 1-level expression -- guaranteed to be
2223   1-level due to previous flattening pass. */
2224
2225static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2226{
2227   switch (ex->tag) {
2228      case Iex_RdTmp:
2229         if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2230            IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2231            if (rhs->tag == Iex_RdTmp)
2232               return rhs;
2233            if (rhs->tag == Iex_Const
2234                && rhs->Iex.Const.con->tag != Ico_F64i)
2235               return rhs;
2236         }
2237         /* not bound in env */
2238         return ex;
2239
2240      case Iex_Const:
2241      case Iex_Get:
2242         return ex;
2243
2244      case Iex_GetI:
2245         vassert(isIRAtom(ex->Iex.GetI.ix));
2246         return IRExpr_GetI(
2247            ex->Iex.GetI.descr,
2248            subst_Expr(env, ex->Iex.GetI.ix),
2249            ex->Iex.GetI.bias
2250         );
2251
2252      case Iex_Qop: {
2253         IRQop* qop = ex->Iex.Qop.details;
2254         vassert(isIRAtom(qop->arg1));
2255         vassert(isIRAtom(qop->arg2));
2256         vassert(isIRAtom(qop->arg3));
2257         vassert(isIRAtom(qop->arg4));
2258         return IRExpr_Qop(
2259                   qop->op,
2260                   subst_Expr(env, qop->arg1),
2261                   subst_Expr(env, qop->arg2),
2262                   subst_Expr(env, qop->arg3),
2263                   subst_Expr(env, qop->arg4)
2264                );
2265      }
2266
2267      case Iex_Triop: {
2268         IRTriop* triop = ex->Iex.Triop.details;
2269         vassert(isIRAtom(triop->arg1));
2270         vassert(isIRAtom(triop->arg2));
2271         vassert(isIRAtom(triop->arg3));
2272         return IRExpr_Triop(
2273                   triop->op,
2274                   subst_Expr(env, triop->arg1),
2275                   subst_Expr(env, triop->arg2),
2276                   subst_Expr(env, triop->arg3)
2277                );
2278      }
2279
2280      case Iex_Binop:
2281         vassert(isIRAtom(ex->Iex.Binop.arg1));
2282         vassert(isIRAtom(ex->Iex.Binop.arg2));
2283         return IRExpr_Binop(
2284                   ex->Iex.Binop.op,
2285                   subst_Expr(env, ex->Iex.Binop.arg1),
2286                   subst_Expr(env, ex->Iex.Binop.arg2)
2287                );
2288
2289      case Iex_Unop:
2290         vassert(isIRAtom(ex->Iex.Unop.arg));
2291         return IRExpr_Unop(
2292                   ex->Iex.Unop.op,
2293                   subst_Expr(env, ex->Iex.Unop.arg)
2294                );
2295
2296      case Iex_Load:
2297         vassert(isIRAtom(ex->Iex.Load.addr));
2298         return IRExpr_Load(
2299                   ex->Iex.Load.end,
2300                   ex->Iex.Load.ty,
2301                   subst_Expr(env, ex->Iex.Load.addr)
2302                );
2303
2304      case Iex_CCall: {
2305         Int      i;
2306         IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2307         for (i = 0; args2[i]; i++) {
2308            vassert(isIRAtom(args2[i]));
2309            args2[i] = subst_Expr(env, args2[i]);
2310         }
2311         return IRExpr_CCall(
2312                   ex->Iex.CCall.cee,
2313                   ex->Iex.CCall.retty,
2314                   args2
2315                );
2316      }
2317
2318      case Iex_Mux0X:
2319         vassert(isIRAtom(ex->Iex.Mux0X.cond));
2320         vassert(isIRAtom(ex->Iex.Mux0X.expr0));
2321         vassert(isIRAtom(ex->Iex.Mux0X.exprX));
2322         return IRExpr_Mux0X(
2323                   subst_Expr(env, ex->Iex.Mux0X.cond),
2324                   subst_Expr(env, ex->Iex.Mux0X.expr0),
2325                   subst_Expr(env, ex->Iex.Mux0X.exprX)
2326                );
2327
2328      default:
2329         vex_printf("\n\n"); ppIRExpr(ex);
2330         vpanic("subst_Expr");
2331
2332   }
2333}
2334
2335
2336/* Apply the subst to stmt, then fold the result as much as possible.
2337   Much simplified due to stmt being previously flattened.  As a
2338   result of this, the stmt may wind up being turned into a no-op.
2339*/
2340static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2341{
2342#  if 0
2343   vex_printf("\nsubst and fold stmt\n");
2344   ppIRStmt(st);
2345   vex_printf("\n");
2346#  endif
2347
2348   switch (st->tag) {
2349      case Ist_AbiHint:
2350         vassert(isIRAtom(st->Ist.AbiHint.base));
2351         vassert(isIRAtom(st->Ist.AbiHint.nia));
2352         return IRStmt_AbiHint(
2353                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2354                   st->Ist.AbiHint.len,
2355                   fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2356                );
2357      case Ist_Put:
2358         vassert(isIRAtom(st->Ist.Put.data));
2359         return IRStmt_Put(
2360                   st->Ist.Put.offset,
2361                   fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2362                );
2363
2364      case Ist_PutI: {
2365         IRPutI *puti, *puti2;
2366         puti = st->Ist.PutI.details;
2367         vassert(isIRAtom(puti->ix));
2368         vassert(isIRAtom(puti->data));
2369         puti2 = mkIRPutI(puti->descr,
2370                          fold_Expr(env, subst_Expr(env, puti->ix)),
2371                          puti->bias,
2372                          fold_Expr(env, subst_Expr(env, puti->data)));
2373         return IRStmt_PutI(puti2);
2374      }
2375
2376      case Ist_WrTmp:
2377         /* This is the one place where an expr (st->Ist.WrTmp.data) is
2378            allowed to be more than just a constant or a tmp. */
2379         return IRStmt_WrTmp(
2380                   st->Ist.WrTmp.tmp,
2381                   fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2382                );
2383
2384      case Ist_Store:
2385         vassert(isIRAtom(st->Ist.Store.addr));
2386         vassert(isIRAtom(st->Ist.Store.data));
2387         return IRStmt_Store(
2388                   st->Ist.Store.end,
2389                   fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2390                   fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2391                );
2392
2393      case Ist_CAS: {
2394         IRCAS *cas, *cas2;
2395         cas = st->Ist.CAS.details;
2396         vassert(isIRAtom(cas->addr));
2397         vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2398         vassert(isIRAtom(cas->expdLo));
2399         vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2400         vassert(isIRAtom(cas->dataLo));
2401         cas2 = mkIRCAS(
2402                   cas->oldHi, cas->oldLo, cas->end,
2403                   fold_Expr(env, subst_Expr(env, cas->addr)),
2404                   cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2405                               : NULL,
2406                   fold_Expr(env, subst_Expr(env, cas->expdLo)),
2407                   cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2408                               : NULL,
2409                   fold_Expr(env, subst_Expr(env, cas->dataLo))
2410                );
2411         return IRStmt_CAS(cas2);
2412      }
2413
2414      case Ist_LLSC:
2415         vassert(isIRAtom(st->Ist.LLSC.addr));
2416         if (st->Ist.LLSC.storedata)
2417            vassert(isIRAtom(st->Ist.LLSC.storedata));
2418         return IRStmt_LLSC(
2419                   st->Ist.LLSC.end,
2420                   st->Ist.LLSC.result,
2421                   fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2422                   st->Ist.LLSC.storedata
2423                      ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2424                      : NULL
2425                );
2426
2427      case Ist_Dirty: {
2428         Int     i;
2429         IRDirty *d, *d2;
2430         d = st->Ist.Dirty.details;
2431         d2 = emptyIRDirty();
2432         *d2 = *d;
2433         d2->args = shallowCopyIRExprVec(d2->args);
2434         if (d2->mFx != Ifx_None) {
2435            vassert(isIRAtom(d2->mAddr));
2436            d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2437         }
2438         vassert(isIRAtom(d2->guard));
2439         d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2440         for (i = 0; d2->args[i]; i++) {
2441            vassert(isIRAtom(d2->args[i]));
2442            d2->args[i] = fold_Expr(env, subst_Expr(env, d2->args[i]));
2443         }
2444         return IRStmt_Dirty(d2);
2445      }
2446
2447      case Ist_IMark:
2448         return IRStmt_IMark(st->Ist.IMark.addr,
2449                             st->Ist.IMark.len,
2450                             st->Ist.IMark.delta);
2451
2452      case Ist_NoOp:
2453         return IRStmt_NoOp();
2454
2455      case Ist_MBE:
2456         return IRStmt_MBE(st->Ist.MBE.event);
2457
2458      case Ist_Exit: {
2459         IRExpr* fcond;
2460         vassert(isIRAtom(st->Ist.Exit.guard));
2461         fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2462         if (fcond->tag == Iex_Const) {
2463            /* Interesting.  The condition on this exit has folded down to
2464               a constant. */
2465            vassert(fcond->Iex.Const.con->tag == Ico_U1);
2466            vassert(fcond->Iex.Const.con->Ico.U1 == False
2467                    || fcond->Iex.Const.con->Ico.U1 == True);
2468            if (fcond->Iex.Const.con->Ico.U1 == False) {
2469               /* exit is never going to happen, so dump the statement. */
2470               return IRStmt_NoOp();
2471            } else {
2472               vassert(fcond->Iex.Const.con->Ico.U1 == True);
2473               /* Hmmm.  The exit has become unconditional.  Leave it
2474                  as it is for now, since we'd have to truncate the BB
2475                  at this point, which is tricky.  Such truncation is
2476                  done later by the dead-code elimination pass. */
2477               /* fall out into the reconstruct-the-exit code. */
2478               if (vex_control.iropt_verbosity > 0)
2479                  /* really a misuse of vex_control.iropt_verbosity */
2480                  vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2481            }
2482         }
2483         return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2484                                   st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2485      }
2486
2487   default:
2488      vex_printf("\n"); ppIRStmt(st);
2489      vpanic("subst_and_fold_Stmt");
2490   }
2491}
2492
2493
2494IRSB* cprop_BB ( IRSB* in )
2495{
2496   Int      i;
2497   IRSB*    out;
2498   IRStmt*  st2;
2499   Int      n_tmps = in->tyenv->types_used;
2500   IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
2501
2502   out = emptyIRSB();
2503   out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2504
2505   /* Set up the env with which travels forward.  This holds a
2506      substitution, mapping IRTemps to IRExprs. The environment
2507      is to be applied as we move along.  Keys are IRTemps.
2508      Values are IRExpr*s.
2509   */
2510   for (i = 0; i < n_tmps; i++)
2511      env[i] = NULL;
2512
2513   /* For each original SSA-form stmt ... */
2514   for (i = 0; i < in->stmts_used; i++) {
2515
2516      /* First apply the substitution to the current stmt.  This
2517         propagates in any constants and tmp-tmp assignments
2518         accumulated prior to this point.  As part of the subst_Stmt
2519         call, also then fold any constant expressions resulting. */
2520
2521      st2 = in->stmts[i];
2522
2523      /* perhaps st2 is already a no-op? */
2524      if (st2->tag == Ist_NoOp) continue;
2525
2526      st2 = subst_and_fold_Stmt( env, st2 );
2527
2528      /* If the statement has been folded into a no-op, forget it. */
2529      if (st2->tag == Ist_NoOp) continue;
2530
2531      /* If the statement assigns to an IRTemp add it to the running
2532         environment. This is for the benefit of copy propagation
2533         and to allow sameIRExpr look through IRTemps. */
2534      if (st2->tag == Ist_WrTmp) {
2535         vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2536         env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2537
2538         /* 't1 = t2' -- don't add to BB; will be optimized out */
2539         if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) continue;
2540
2541         /* 't = const' && 'const != F64i' -- don't add to BB
2542            Note, we choose not to propagate const when const is an
2543            F64i, so that F64i literals can be CSE'd later.  This helps
2544            x86 floating point code generation. */
2545         if (st2->Ist.WrTmp.data->tag == Iex_Const
2546             && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) continue;
2547      }
2548
2549      /* Not interesting, copy st2 into the output block. */
2550      addStmtToIRSB( out, st2 );
2551   }
2552
2553#if STATS_IROPT
2554   vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
2555              invocation_count, recursion_count, success_count,
2556              recursion_success_count, max_nodes_visited);
2557#endif
2558
2559   out->next     = subst_Expr( env, in->next );
2560   out->jumpkind = in->jumpkind;
2561   out->offsIP   = in->offsIP;
2562   return out;
2563}
2564
2565
2566/*---------------------------------------------------------------*/
2567/*--- Dead code (t = E) removal                               ---*/
2568/*---------------------------------------------------------------*/
2569
2570/* As a side effect, also removes all code following an unconditional
2571   side exit. */
2572
2573/* The type of the HashHW map is: a map from IRTemp to nothing
2574   -- really just operating a set or IRTemps.
2575*/
2576
2577inline
2578static void addUses_Temp ( Bool* set, IRTemp tmp )
2579{
2580   set[(Int)tmp] = True;
2581}
2582
2583static void addUses_Expr ( Bool* set, IRExpr* e )
2584{
2585   Int i;
2586   switch (e->tag) {
2587      case Iex_GetI:
2588         addUses_Expr(set, e->Iex.GetI.ix);
2589         return;
2590      case Iex_Mux0X:
2591         addUses_Expr(set, e->Iex.Mux0X.cond);
2592         addUses_Expr(set, e->Iex.Mux0X.expr0);
2593         addUses_Expr(set, e->Iex.Mux0X.exprX);
2594         return;
2595      case Iex_CCall:
2596         for (i = 0; e->Iex.CCall.args[i]; i++)
2597            addUses_Expr(set, e->Iex.CCall.args[i]);
2598         return;
2599      case Iex_Load:
2600         addUses_Expr(set, e->Iex.Load.addr);
2601         return;
2602      case Iex_Qop:
2603         addUses_Expr(set, e->Iex.Qop.details->arg1);
2604         addUses_Expr(set, e->Iex.Qop.details->arg2);
2605         addUses_Expr(set, e->Iex.Qop.details->arg3);
2606         addUses_Expr(set, e->Iex.Qop.details->arg4);
2607         return;
2608      case Iex_Triop:
2609         addUses_Expr(set, e->Iex.Triop.details->arg1);
2610         addUses_Expr(set, e->Iex.Triop.details->arg2);
2611         addUses_Expr(set, e->Iex.Triop.details->arg3);
2612         return;
2613      case Iex_Binop:
2614         addUses_Expr(set, e->Iex.Binop.arg1);
2615         addUses_Expr(set, e->Iex.Binop.arg2);
2616         return;
2617      case Iex_Unop:
2618         addUses_Expr(set, e->Iex.Unop.arg);
2619         return;
2620      case Iex_RdTmp:
2621         addUses_Temp(set, e->Iex.RdTmp.tmp);
2622         return;
2623      case Iex_Const:
2624      case Iex_Get:
2625         return;
2626      default:
2627         vex_printf("\n");
2628         ppIRExpr(e);
2629         vpanic("addUses_Expr");
2630   }
2631}
2632
2633static void addUses_Stmt ( Bool* set, IRStmt* st )
2634{
2635   Int      i;
2636   IRDirty* d;
2637   IRCAS*   cas;
2638   switch (st->tag) {
2639      case Ist_AbiHint:
2640         addUses_Expr(set, st->Ist.AbiHint.base);
2641         addUses_Expr(set, st->Ist.AbiHint.nia);
2642         return;
2643      case Ist_PutI:
2644         addUses_Expr(set, st->Ist.PutI.details->ix);
2645         addUses_Expr(set, st->Ist.PutI.details->data);
2646         return;
2647      case Ist_WrTmp:
2648         addUses_Expr(set, st->Ist.WrTmp.data);
2649         return;
2650      case Ist_Put:
2651         addUses_Expr(set, st->Ist.Put.data);
2652         return;
2653      case Ist_Store:
2654         addUses_Expr(set, st->Ist.Store.addr);
2655         addUses_Expr(set, st->Ist.Store.data);
2656         return;
2657      case Ist_CAS:
2658         cas = st->Ist.CAS.details;
2659         addUses_Expr(set, cas->addr);
2660         if (cas->expdHi)
2661            addUses_Expr(set, cas->expdHi);
2662         addUses_Expr(set, cas->expdLo);
2663         if (cas->dataHi)
2664            addUses_Expr(set, cas->dataHi);
2665         addUses_Expr(set, cas->dataLo);
2666         return;
2667      case Ist_LLSC:
2668         addUses_Expr(set, st->Ist.LLSC.addr);
2669         if (st->Ist.LLSC.storedata)
2670            addUses_Expr(set, st->Ist.LLSC.storedata);
2671         return;
2672      case Ist_Dirty:
2673         d = st->Ist.Dirty.details;
2674         if (d->mFx != Ifx_None)
2675            addUses_Expr(set, d->mAddr);
2676         addUses_Expr(set, d->guard);
2677         for (i = 0; d->args[i] != NULL; i++)
2678            addUses_Expr(set, d->args[i]);
2679         return;
2680      case Ist_NoOp:
2681      case Ist_IMark:
2682      case Ist_MBE:
2683         return;
2684      case Ist_Exit:
2685         addUses_Expr(set, st->Ist.Exit.guard);
2686         return;
2687      default:
2688         vex_printf("\n");
2689         ppIRStmt(st);
2690         vpanic("addUses_Stmt");
2691   }
2692}
2693
2694
2695/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
2696static Bool isZeroU1 ( IRExpr* e )
2697{
2698   return toBool( e->tag == Iex_Const
2699                  && e->Iex.Const.con->tag == Ico_U1
2700                  && e->Iex.Const.con->Ico.U1 == False );
2701}
2702
2703/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
2704static Bool isOneU1 ( IRExpr* e )
2705{
2706   return toBool( e->tag == Iex_Const
2707                  && e->Iex.Const.con->tag == Ico_U1
2708                  && e->Iex.Const.con->Ico.U1 == True );
2709}
2710
2711
2712/* Note, this destructively modifies the given IRSB. */
2713
2714/* Scan backwards through statements, carrying a set of IRTemps which
2715   are known to be used after the current point.  On encountering 't =
2716   E', delete the binding if it is not used.  Otherwise, add any temp
2717   uses to the set and keep on moving backwards.
2718
2719   As an enhancement, the first (backwards) pass searches for IR exits
2720   with always-taken conditions and notes the location of the earliest
2721   one in the block.  If any such are found, a second pass copies the
2722   exit destination and jump kind to the bb-end.  Then, the exit and
2723   all statements following it are turned into no-ops.
2724*/
2725
2726/* notstatic */ void do_deadcode_BB ( IRSB* bb )
2727{
2728   Int     i, i_unconditional_exit;
2729   Int     n_tmps = bb->tyenv->types_used;
2730   Bool*   set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2731   IRStmt* st;
2732
2733   for (i = 0; i < n_tmps; i++)
2734      set[i] = False;
2735
2736   /* start off by recording IRTemp uses in the next field. */
2737   addUses_Expr(set, bb->next);
2738
2739   /* First pass */
2740
2741   /* Work backwards through the stmts */
2742   i_unconditional_exit = -1;
2743   for (i = bb->stmts_used-1; i >= 0; i--) {
2744      st = bb->stmts[i];
2745      if (st->tag == Ist_NoOp)
2746         continue;
2747      /* take note of any unconditional exits */
2748      if (st->tag == Ist_Exit
2749          && isOneU1(st->Ist.Exit.guard))
2750         i_unconditional_exit = i;
2751      if (st->tag == Ist_WrTmp
2752          && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2753          /* it's an IRTemp which never got used.  Delete it. */
2754         if (DEBUG_IROPT) {
2755            vex_printf("DEAD: ");
2756            ppIRStmt(st);
2757            vex_printf("\n");
2758         }
2759         bb->stmts[i] = IRStmt_NoOp();
2760      }
2761      else
2762      if (st->tag == Ist_Dirty
2763          && st->Ist.Dirty.details->guard
2764          && isZeroU1(st->Ist.Dirty.details->guard)) {
2765         /* This is a dirty helper which will never get called.
2766            Delete it. */
2767         bb->stmts[i] = IRStmt_NoOp();
2768       }
2769       else {
2770         /* Note any IRTemp uses made by the current statement. */
2771         addUses_Stmt(set, st);
2772      }
2773   }
2774
2775   /* Optional second pass: if any unconditional exits were found,
2776      delete them and all following statements. */
2777
2778   if (i_unconditional_exit != -1) {
2779      if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2780                        i_unconditional_exit);
2781      vassert(i_unconditional_exit >= 0
2782              && i_unconditional_exit < bb->stmts_used);
2783      bb->next
2784         = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2785      bb->jumpkind
2786         = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2787      bb->offsIP
2788         = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
2789      for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2790         bb->stmts[i] = IRStmt_NoOp();
2791   }
2792}
2793
2794
2795/*---------------------------------------------------------------*/
2796/*--- Specialisation of helper function calls, in             ---*/
2797/*--- collaboration with the front end                        ---*/
2798/*---------------------------------------------------------------*/
2799
2800static
2801IRSB* spec_helpers_BB(
2802         IRSB* bb,
2803         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
2804      )
2805{
2806   Int     i;
2807   IRStmt* st;
2808   IRExpr* ex;
2809   Bool    any = False;
2810
2811   for (i = bb->stmts_used-1; i >= 0; i--) {
2812      st = bb->stmts[i];
2813
2814      if (st->tag != Ist_WrTmp
2815          || st->Ist.WrTmp.data->tag != Iex_CCall)
2816         continue;
2817
2818      ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2819                          st->Ist.WrTmp.data->Iex.CCall.args,
2820                          &bb->stmts[0], i );
2821      if (!ex)
2822        /* the front end can't think of a suitable replacement */
2823        continue;
2824
2825      /* We got something better.  Install it in the bb. */
2826      any = True;
2827      bb->stmts[i]
2828         = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2829
2830      if (0) {
2831         vex_printf("SPEC: ");
2832         ppIRExpr(st->Ist.WrTmp.data);
2833         vex_printf("  -->  ");
2834         ppIRExpr(ex);
2835         vex_printf("\n");
2836      }
2837   }
2838
2839   if (any)
2840      bb = flatten_BB(bb);
2841   return bb;
2842}
2843
2844
2845/*---------------------------------------------------------------*/
2846/*--- Determination of guest state aliasing relationships     ---*/
2847/*---------------------------------------------------------------*/
2848
2849/* These are helper functions for CSE and GetI/PutI transformations.
2850
2851   Determine, to the extent possible, the relationship between two
2852   guest state accesses.  The possible outcomes are:
2853
2854   * Exact alias.  These two accesses denote precisely the same
2855     piece of the guest state.
2856
2857   * Definitely no alias.  These two accesses are guaranteed not to
2858     overlap any part of the guest state.
2859
2860   * Unknown -- if neither of the above can be established.
2861
2862   If in doubt, return Unknown.  */
2863
2864typedef
2865   enum { ExactAlias, NoAlias, UnknownAlias }
2866   GSAliasing;
2867
2868
2869/* Produces the alias relation between an indexed guest
2870   state access and a non-indexed access. */
2871
2872static
2873GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2874                                    Int offset2, IRType ty2 )
2875{
2876   UInt minoff1, maxoff1, minoff2, maxoff2;
2877
2878   getArrayBounds( descr1, &minoff1, &maxoff1 );
2879   minoff2 = offset2;
2880   maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2881
2882   if (maxoff1 < minoff2 || maxoff2 < minoff1)
2883      return NoAlias;
2884
2885   /* Could probably do better here if required.  For the moment
2886      however just claim not to know anything more. */
2887   return UnknownAlias;
2888}
2889
2890
2891/* Produces the alias relation between two indexed guest state
2892   accesses. */
2893
2894static
2895GSAliasing getAliasingRelation_II (
2896              IRRegArray* descr1, IRExpr* ix1, Int bias1,
2897              IRRegArray* descr2, IRExpr* ix2, Int bias2
2898           )
2899{
2900   UInt minoff1, maxoff1, minoff2, maxoff2;
2901   Int  iters;
2902
2903   /* First try hard to show they don't alias. */
2904   getArrayBounds( descr1, &minoff1, &maxoff1 );
2905   getArrayBounds( descr2, &minoff2, &maxoff2 );
2906   if (maxoff1 < minoff2 || maxoff2 < minoff1)
2907      return NoAlias;
2908
2909   /* So the two arrays at least partially overlap.  To get any
2910      further we'll have to be sure that the descriptors are
2911      identical. */
2912   if (!eqIRRegArray(descr1, descr2))
2913      return UnknownAlias;
2914
2915   /* The descriptors are identical.  Now the only difference can be
2916      in the index expressions.  If they cannot be shown to be
2917      identical, we have to say we don't know what the aliasing
2918      relation will be.  Now, since the IR is flattened, the index
2919      expressions should be atoms -- either consts or tmps.  So that
2920      makes the comparison simple. */
2921   vassert(isIRAtom(ix1));
2922   vassert(isIRAtom(ix2));
2923   if (!eqIRAtom(ix1,ix2))
2924      return UnknownAlias;
2925
2926   /* Ok, the index expressions are identical.  So now the only way
2927      they can be different is in the bias.  Normalise this
2928      paranoidly, to reliably establish equality/non-equality. */
2929
2930   /* So now we know that the GetI and PutI index the same array
2931      with the same base.  Are the offsets the same, modulo the
2932      array size?  Do this paranoidly. */
2933   vassert(descr1->nElems == descr2->nElems);
2934   vassert(descr1->elemTy == descr2->elemTy);
2935   vassert(descr1->base   == descr2->base);
2936   iters = 0;
2937   while (bias1 < 0 || bias2 < 0) {
2938      bias1 += descr1->nElems;
2939      bias2 += descr1->nElems;
2940      iters++;
2941      if (iters > 10)
2942         vpanic("getAliasingRelation: iters");
2943   }
2944   vassert(bias1 >= 0 && bias2 >= 0);
2945   bias1 %= descr1->nElems;
2946   bias2 %= descr1->nElems;
2947   vassert(bias1 >= 0 && bias1 < descr1->nElems);
2948   vassert(bias2 >= 0 && bias2 < descr1->nElems);
2949
2950   /* Finally, biasP and biasG are normalised into the range
2951      0 .. descrP/G->nElems - 1.  And so we can establish
2952      equality/non-equality. */
2953
2954   return bias1==bias2 ? ExactAlias : NoAlias;
2955}
2956
2957
2958/*---------------------------------------------------------------*/
2959/*--- Common Subexpression Elimination                        ---*/
2960/*---------------------------------------------------------------*/
2961
2962/* Expensive in time and space. */
2963
2964/* Uses two environments:
2965   a IRTemp -> IRTemp mapping
2966   a mapping from AvailExpr* to IRTemp
2967*/
2968
2969typedef
2970   struct {
2971      enum { TCc, TCt } tag;
2972      union { IRTemp tmp; IRConst* con; } u;
2973   }
2974   TmpOrConst;
2975
2976static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
2977{
2978   if (tc1->tag != tc2->tag)
2979      return False;
2980   switch (tc1->tag) {
2981      case TCc:
2982         return eqIRConst(tc1->u.con, tc2->u.con);
2983      case TCt:
2984         return tc1->u.tmp == tc2->u.tmp;
2985      default:
2986         vpanic("eqTmpOrConst");
2987   }
2988}
2989
2990static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
2991{
2992   Bool eq = cee1->addr == cee2->addr;
2993   if (eq) {
2994      vassert(cee1->regparms == cee2->regparms);
2995      vassert(cee1->mcx_mask == cee2->mcx_mask);
2996      /* Names should be the same too, but we don't bother to
2997         check. */
2998   }
2999   return eq;
3000}
3001
3002/* Convert a NULL terminated IRExpr* vector to an array of
3003   TmpOrConsts, and a length. */
3004static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3005                                       /*OUT*/Int* nOuts,
3006                                       IRExpr** ins )
3007{
3008   Int i, n;
3009   /* We have to make two passes, one to count, one to copy. */
3010   for (n = 0; ins[n]; n++)
3011      ;
3012   *outs  = LibVEX_Alloc(n * sizeof(TmpOrConst));
3013   *nOuts = n;
3014   /* and now copy .. */
3015   for (i = 0; i < n; i++) {
3016      IRExpr*       arg = ins[i];
3017      TmpOrConst* dst = &(*outs)[i];
3018      if (arg->tag == Iex_RdTmp) {
3019         dst->tag   = TCt;
3020         dst->u.tmp = arg->Iex.RdTmp.tmp;
3021      }
3022      else if (arg->tag == Iex_Const) {
3023         dst->tag   = TCc;
3024         dst->u.con = arg->Iex.Const.con;
3025      }
3026      else {
3027         /* Failure of this is serious; it means that the presented arg
3028            isn't an IR atom, as it should be. */
3029         vpanic("irExprVec_to_TmpOrConsts");
3030      }
3031   }
3032}
3033
3034typedef
3035   struct {
3036      enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt, CCall } tag;
3037      union {
3038         /* unop(tmp) */
3039         struct {
3040            IROp   op;
3041            IRTemp arg;
3042         } Ut;
3043         /* binop(tmp,tmp) */
3044         struct {
3045            IROp   op;
3046            IRTemp arg1;
3047            IRTemp arg2;
3048         } Btt;
3049         /* binop(tmp,const) */
3050         struct {
3051            IROp    op;
3052            IRTemp  arg1;
3053            IRConst con2;
3054         } Btc;
3055         /* binop(const,tmp) */
3056         struct {
3057            IROp    op;
3058            IRConst con1;
3059            IRTemp  arg2;
3060         } Bct;
3061         /* F64i-style const */
3062         struct {
3063            ULong f64i;
3064         } Cf64i;
3065         /* Mux0X(tmp,tmp,tmp) */
3066         struct {
3067            IRTemp co;
3068            IRTemp e0;
3069            IRTemp eX;
3070         } Mttt;
3071         /* GetI(descr,tmp,bias)*/
3072         struct {
3073            IRRegArray* descr;
3074            IRTemp      ix;
3075            Int         bias;
3076         } GetIt;
3077         /* Clean helper call */
3078         struct {
3079            IRCallee*   cee;
3080            TmpOrConst* args;
3081            Int         nArgs;
3082            IRType      retty;
3083         } CCall;
3084      } u;
3085   }
3086   AvailExpr;
3087
3088static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3089{
3090   if (LIKELY(a1->tag != a2->tag))
3091      return False;
3092   switch (a1->tag) {
3093      case Ut:
3094         return toBool(
3095                a1->u.Ut.op == a2->u.Ut.op
3096                && a1->u.Ut.arg == a2->u.Ut.arg);
3097      case Btt:
3098         return toBool(
3099                a1->u.Btt.op == a2->u.Btt.op
3100                && a1->u.Btt.arg1 == a2->u.Btt.arg1
3101                && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3102      case Btc:
3103         return toBool(
3104                a1->u.Btc.op == a2->u.Btc.op
3105                && a1->u.Btc.arg1 == a2->u.Btc.arg1
3106                && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3107      case Bct:
3108         return toBool(
3109                a1->u.Bct.op == a2->u.Bct.op
3110                && a1->u.Bct.arg2 == a2->u.Bct.arg2
3111                && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3112      case Cf64i:
3113         return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3114      case Mttt:
3115         return toBool(a1->u.Mttt.co == a2->u.Mttt.co
3116                       && a1->u.Mttt.e0 == a2->u.Mttt.e0
3117                       && a1->u.Mttt.eX == a2->u.Mttt.eX);
3118      case GetIt:
3119         return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3120                       && a1->u.GetIt.ix == a2->u.GetIt.ix
3121                       && a1->u.GetIt.bias == a2->u.GetIt.bias);
3122      case CCall: {
3123         Int  i, n;
3124         Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3125                   && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3126         if (eq) {
3127            n = a1->u.CCall.nArgs;
3128            for (i = 0; i < n; i++) {
3129               if (!eqTmpOrConst( &a1->u.CCall.args[i],
3130                                  &a2->u.CCall.args[i] )) {
3131                  eq = False;
3132                  break;
3133               }
3134            }
3135         }
3136         if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3137         return eq;
3138      }
3139      default: vpanic("eq_AvailExpr");
3140   }
3141}
3142
3143static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3144{
3145   IRConst* con;
3146   switch (ae->tag) {
3147      case Ut:
3148         return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3149      case Btt:
3150         return IRExpr_Binop( ae->u.Btt.op,
3151                              IRExpr_RdTmp(ae->u.Btt.arg1),
3152                              IRExpr_RdTmp(ae->u.Btt.arg2) );
3153      case Btc:
3154         con = LibVEX_Alloc(sizeof(IRConst));
3155         *con = ae->u.Btc.con2;
3156         return IRExpr_Binop( ae->u.Btc.op,
3157                              IRExpr_RdTmp(ae->u.Btc.arg1),
3158                              IRExpr_Const(con) );
3159      case Bct:
3160         con = LibVEX_Alloc(sizeof(IRConst));
3161         *con = ae->u.Bct.con1;
3162         return IRExpr_Binop( ae->u.Bct.op,
3163                              IRExpr_Const(con),
3164                              IRExpr_RdTmp(ae->u.Bct.arg2) );
3165      case Cf64i:
3166         return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3167      case Mttt:
3168         return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
3169                             IRExpr_RdTmp(ae->u.Mttt.e0),
3170                             IRExpr_RdTmp(ae->u.Mttt.eX));
3171      case GetIt:
3172         return IRExpr_GetI(ae->u.GetIt.descr,
3173                            IRExpr_RdTmp(ae->u.GetIt.ix),
3174                            ae->u.GetIt.bias);
3175      case CCall: {
3176         Int i, n = ae->u.CCall.nArgs;
3177         vassert(n >= 0);
3178         IRExpr** vec = LibVEX_Alloc((n+1) * sizeof(IRExpr*));
3179         vec[n] = NULL;
3180         for (i = 0; i < n; i++) {
3181            TmpOrConst* tc = &ae->u.CCall.args[i];
3182            if (tc->tag == TCc) {
3183               vec[i] = IRExpr_Const(tc->u.con);
3184            }
3185            else if (tc->tag == TCt) {
3186               vec[i] = IRExpr_RdTmp(tc->u.tmp);
3187            }
3188            else vpanic("availExpr_to_IRExpr:CCall-arg");
3189         }
3190         return IRExpr_CCall(ae->u.CCall.cee,
3191                             ae->u.CCall.retty,
3192                             vec);
3193      }
3194      default:
3195         vpanic("availExpr_to_IRExpr");
3196   }
3197}
3198
3199inline
3200static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3201{
3202   HWord res;
3203   /* env :: IRTemp -> IRTemp */
3204   if (lookupHHW( env, &res, (HWord)tmp ))
3205      return (IRTemp)res;
3206   else
3207      return tmp;
3208}
3209
3210static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3211{
3212   /* env :: IRTemp -> IRTemp */
3213   switch (ae->tag) {
3214      case Ut:
3215         ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3216         break;
3217      case Btt:
3218         ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3219         ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3220         break;
3221      case Btc:
3222         ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3223         break;
3224      case Bct:
3225         ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3226         break;
3227      case Cf64i:
3228         break;
3229      case Mttt:
3230         ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
3231         ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
3232         ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
3233         break;
3234      case GetIt:
3235         ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3236         break;
3237      case CCall: {
3238         Int i, n = ae->u.CCall.nArgs;;
3239         for (i = 0; i < n; i++) {
3240            TmpOrConst* tc = &ae->u.CCall.args[i];
3241            if (tc->tag == TCt) {
3242               tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3243            }
3244         }
3245         break;
3246      }
3247      default:
3248         vpanic("subst_AvailExpr");
3249   }
3250}
3251
3252static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
3253{
3254   AvailExpr* ae;
3255
3256   if (e->tag == Iex_Unop
3257       && e->Iex.Unop.arg->tag == Iex_RdTmp) {
3258      ae = LibVEX_Alloc(sizeof(AvailExpr));
3259      ae->tag      = Ut;
3260      ae->u.Ut.op  = e->Iex.Unop.op;
3261      ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3262      return ae;
3263   }
3264
3265   if (e->tag == Iex_Binop
3266       && e->Iex.Binop.arg1->tag == Iex_RdTmp
3267       && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3268      ae = LibVEX_Alloc(sizeof(AvailExpr));
3269      ae->tag        = Btt;
3270      ae->u.Btt.op   = e->Iex.Binop.op;
3271      ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3272      ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3273      return ae;
3274   }
3275
3276   if (e->tag == Iex_Binop
3277      && e->Iex.Binop.arg1->tag == Iex_RdTmp
3278      && e->Iex.Binop.arg2->tag == Iex_Const) {
3279      ae = LibVEX_Alloc(sizeof(AvailExpr));
3280      ae->tag        = Btc;
3281      ae->u.Btc.op   = e->Iex.Binop.op;
3282      ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3283      ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3284      return ae;
3285   }
3286
3287   if (e->tag == Iex_Binop
3288      && e->Iex.Binop.arg1->tag == Iex_Const
3289      && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3290      ae = LibVEX_Alloc(sizeof(AvailExpr));
3291      ae->tag        = Bct;
3292      ae->u.Bct.op   = e->Iex.Binop.op;
3293      ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3294      ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3295      return ae;
3296   }
3297
3298   if (e->tag == Iex_Const
3299       && e->Iex.Const.con->tag == Ico_F64i) {
3300      ae = LibVEX_Alloc(sizeof(AvailExpr));
3301      ae->tag          = Cf64i;
3302      ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3303      return ae;
3304   }
3305
3306   if (e->tag == Iex_Mux0X
3307       && e->Iex.Mux0X.cond->tag == Iex_RdTmp
3308       && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
3309       && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
3310      ae = LibVEX_Alloc(sizeof(AvailExpr));
3311      ae->tag       = Mttt;
3312      ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
3313      ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
3314      ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
3315      return ae;
3316   }
3317
3318   if (e->tag == Iex_GetI
3319       && e->Iex.GetI.ix->tag == Iex_RdTmp) {
3320      ae = LibVEX_Alloc(sizeof(AvailExpr));
3321      ae->tag           = GetIt;
3322      ae->u.GetIt.descr = e->Iex.GetI.descr;
3323      ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3324      ae->u.GetIt.bias  = e->Iex.GetI.bias;
3325      return ae;
3326   }
3327
3328   if (e->tag == Iex_CCall) {
3329      ae = LibVEX_Alloc(sizeof(AvailExpr));
3330      ae->tag = CCall;
3331      /* Ok to share only the cee, since it is immutable. */
3332      ae->u.CCall.cee   = e->Iex.CCall.cee;
3333      ae->u.CCall.retty = e->Iex.CCall.retty;
3334      /* irExprVec_to_TmpOrConsts will assert if the args are
3335         neither tmps nor constants, but that's ok .. that's all they
3336         should be. */
3337      irExprVec_to_TmpOrConsts(
3338         &ae->u.CCall.args, &ae->u.CCall.nArgs,
3339         e->Iex.CCall.args
3340      );
3341      return ae;
3342   }
3343
3344   return NULL;
3345}
3346
3347
3348/* The BB is modified in-place.  Returns True if any changes were
3349   made. */
3350
3351static Bool do_cse_BB ( IRSB* bb )
3352{
3353   Int        i, j, paranoia;
3354   IRTemp     t, q;
3355   IRStmt*    st;
3356   AvailExpr* eprime;
3357   AvailExpr* ae;
3358   Bool       invalidate;
3359   Bool       anyDone = False;
3360
3361   HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3362   HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3363
3364   vassert(sizeof(IRTemp) <= sizeof(HWord));
3365
3366   if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3367
3368   /* Iterate forwards over the stmts.
3369      On seeing "t = E", where E is one of the 5 AvailExpr forms:
3370         let E' = apply tenv substitution to E
3371         search aenv for E'
3372            if a mapping E' -> q is found,
3373               replace this stmt by "t = q"
3374               and add binding t -> q to tenv
3375            else
3376               add binding E' -> t to aenv
3377               replace this stmt by "t = E'"
3378
3379      Other statements are only interesting to the extent that they
3380      might invalidate some of the expressions in aenv.  So there is
3381      an invalidate-bindings check for each statement seen.
3382   */
3383   for (i = 0; i < bb->stmts_used; i++) {
3384      st = bb->stmts[i];
3385
3386      /* ------ BEGIN invalidate aenv bindings ------ */
3387      /* This is critical: remove from aenv any E' -> .. bindings
3388         which might be invalidated by this statement.  The only
3389         vulnerable kind of bindings are the GetI kind.
3390            Dirty call - dump (paranoia level -> 2)
3391            Store      - dump (ditto)
3392            Put, PutI  - dump unless no-overlap is proven (.. -> 1)
3393         Uses getAliasingRelation_IC and getAliasingRelation_II
3394         to do the no-overlap assessments needed for Put/PutI.
3395      */
3396      switch (st->tag) {
3397         case Ist_Dirty: case Ist_Store: case Ist_MBE:
3398         case Ist_CAS: case Ist_LLSC:
3399            paranoia = 2; break;
3400         case Ist_Put: case Ist_PutI:
3401            paranoia = 1; break;
3402         case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3403         case Ist_WrTmp: case Ist_Exit:
3404            paranoia = 0; break;
3405         default:
3406            vpanic("do_cse_BB(1)");
3407      }
3408
3409      if (paranoia > 0) {
3410         for (j = 0; j < aenv->used; j++) {
3411            if (!aenv->inuse[j])
3412               continue;
3413            ae = (AvailExpr*)aenv->key[j];
3414            if (ae->tag != GetIt)
3415               continue;
3416            invalidate = False;
3417            if (paranoia >= 2) {
3418               invalidate = True;
3419            } else {
3420               vassert(paranoia == 1);
3421               if (st->tag == Ist_Put) {
3422                  if (getAliasingRelation_IC(
3423                         ae->u.GetIt.descr,
3424                         IRExpr_RdTmp(ae->u.GetIt.ix),
3425                         st->Ist.Put.offset,
3426                         typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3427                      ) != NoAlias)
3428                     invalidate = True;
3429               }
3430               else
3431               if (st->tag == Ist_PutI) {
3432                  IRPutI *puti = st->Ist.PutI.details;
3433                  if (getAliasingRelation_II(
3434                         ae->u.GetIt.descr,
3435                         IRExpr_RdTmp(ae->u.GetIt.ix),
3436                         ae->u.GetIt.bias,
3437                         puti->descr,
3438                         puti->ix,
3439                         puti->bias
3440                      ) != NoAlias)
3441                     invalidate = True;
3442               }
3443               else
3444                  vpanic("do_cse_BB(2)");
3445            }
3446
3447            if (invalidate) {
3448               aenv->inuse[j] = False;
3449               aenv->key[j]   = (HWord)NULL;  /* be sure */
3450            }
3451         } /* for j */
3452      } /* paranoia > 0 */
3453
3454      /* ------ ENV invalidate aenv bindings ------ */
3455
3456      /* ignore not-interestings */
3457      if (st->tag != Ist_WrTmp)
3458         continue;
3459
3460      t = st->Ist.WrTmp.tmp;
3461      eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
3462      /* ignore if not of AvailExpr form */
3463      if (!eprime)
3464         continue;
3465
3466      /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3467
3468      /* apply tenv */
3469      subst_AvailExpr( tenv, eprime );
3470
3471      /* search aenv for eprime, unfortunately the hard way */
3472      for (j = 0; j < aenv->used; j++)
3473         if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
3474            break;
3475
3476      if (j < aenv->used) {
3477         /* A binding E' -> q was found.  Replace stmt by "t = q" and
3478            note the t->q binding in tenv. */
3479         /* (this is the core of the CSE action) */
3480         q = (IRTemp)aenv->val[j];
3481         bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
3482         addToHHW( tenv, (HWord)t, (HWord)q );
3483         anyDone = True;
3484      } else {
3485         /* No binding was found, so instead we add E' -> t to our
3486            collection of available expressions, replace this stmt
3487            with "t = E'", and move on. */
3488         bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
3489         addToHHW( aenv, (HWord)eprime, (HWord)t );
3490      }
3491   }
3492
3493   /*
3494   ppIRSB(bb);
3495   sanityCheckIRSB(bb, Ity_I32);
3496   vex_printf("\n\n");
3497   */
3498   return anyDone;
3499}
3500
3501
3502/*---------------------------------------------------------------*/
3503/*--- Add32/Sub32 chain collapsing                            ---*/
3504/*---------------------------------------------------------------*/
3505
3506/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
3507
3508/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
3509   yes, set *tmp and *i32 appropriately.  *i32 is set as if the
3510   root node is Add32, not Sub32. */
3511
3512static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
3513{
3514   if (e->tag != Iex_Binop)
3515      return False;
3516   if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
3517      return False;
3518   if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
3519      return False;
3520   if (e->Iex.Binop.arg2->tag != Iex_Const)
3521      return False;
3522   *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3523   *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
3524   if (e->Iex.Binop.op == Iop_Sub32)
3525      *i32 = -*i32;
3526   return True;
3527}
3528
3529
3530/* Figure out if tmp can be expressed as tmp2 +32 const, for some
3531   other tmp2.  Scan backwards from the specified start point -- an
3532   optimisation. */
3533
3534static Bool collapseChain ( IRSB* bb, Int startHere,
3535                            IRTemp tmp,
3536                            IRTemp* tmp2, Int* i32 )
3537{
3538   Int     j, ii;
3539   IRTemp  vv;
3540   IRStmt* st;
3541   IRExpr* e;
3542
3543   /* the (var, con) pair contain the current 'representation' for
3544      'tmp'.  We start with 'tmp + 0'.  */
3545   IRTemp var = tmp;
3546   Int    con = 0;
3547
3548   /* Scan backwards to see if tmp can be replaced by some other tmp
3549     +/- a constant. */
3550   for (j = startHere; j >= 0; j--) {
3551      st = bb->stmts[j];
3552      if (st->tag != Ist_WrTmp)
3553         continue;
3554      if (st->Ist.WrTmp.tmp != var)
3555         continue;
3556      e = st->Ist.WrTmp.data;
3557      if (!isAdd32OrSub32(e, &vv, &ii))
3558         break;
3559      var = vv;
3560      con += ii;
3561   }
3562   if (j == -1)
3563      /* no earlier binding for var .. ill-formed IR */
3564      vpanic("collapseChain");
3565
3566   /* so, did we find anything interesting? */
3567   if (var == tmp)
3568      return False; /* no .. */
3569
3570   *tmp2 = var;
3571   *i32  = con;
3572   return True;
3573}
3574
3575
3576/* ------- Main function for Add32/Sub32 chain collapsing ------ */
3577
3578static void collapse_AddSub_chains_BB ( IRSB* bb )
3579{
3580   IRStmt *st;
3581   IRTemp var, var2;
3582   Int    i, con, con2;
3583
3584   for (i = bb->stmts_used-1; i >= 0; i--) {
3585      st = bb->stmts[i];
3586      if (st->tag == Ist_NoOp)
3587         continue;
3588
3589      /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
3590
3591      if (st->tag == Ist_WrTmp
3592          && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
3593
3594         /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
3595            Find out if var can be expressed as var2 + con2. */
3596         if (collapseChain(bb, i-1, var, &var2, &con2)) {
3597            if (DEBUG_IROPT) {
3598               vex_printf("replacing1 ");
3599               ppIRStmt(st);
3600               vex_printf(" with ");
3601            }
3602            con2 += con;
3603            bb->stmts[i]
3604               = IRStmt_WrTmp(
3605                    st->Ist.WrTmp.tmp,
3606                    (con2 >= 0)
3607                      ? IRExpr_Binop(Iop_Add32,
3608                                     IRExpr_RdTmp(var2),
3609                                     IRExpr_Const(IRConst_U32(con2)))
3610                      : IRExpr_Binop(Iop_Sub32,
3611                                     IRExpr_RdTmp(var2),
3612                                     IRExpr_Const(IRConst_U32(-con2)))
3613                 );
3614            if (DEBUG_IROPT) {
3615               ppIRStmt(bb->stmts[i]);
3616               vex_printf("\n");
3617            }
3618         }
3619
3620         continue;
3621      }
3622
3623      /* Try to collapse 't1 = GetI[t2, con]'. */
3624
3625      if (st->tag == Ist_WrTmp
3626          && st->Ist.WrTmp.data->tag == Iex_GetI
3627          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
3628          && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
3629                                      ->Iex.RdTmp.tmp, &var2, &con2)) {
3630         if (DEBUG_IROPT) {
3631            vex_printf("replacing3 ");
3632            ppIRStmt(st);
3633            vex_printf(" with ");
3634         }
3635         con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
3636         bb->stmts[i]
3637            = IRStmt_WrTmp(
3638                 st->Ist.WrTmp.tmp,
3639                 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
3640                             IRExpr_RdTmp(var2),
3641                             con2));
3642         if (DEBUG_IROPT) {
3643            ppIRStmt(bb->stmts[i]);
3644            vex_printf("\n");
3645         }
3646         continue;
3647      }
3648
3649      /* Perhaps st is PutI[t, con] ? */
3650      IRPutI *puti = st->Ist.PutI.details;
3651      if (st->tag == Ist_PutI
3652          && puti->ix->tag == Iex_RdTmp
3653          && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
3654                               &var2, &con2)) {
3655         if (DEBUG_IROPT) {
3656            vex_printf("replacing2 ");
3657            ppIRStmt(st);
3658            vex_printf(" with ");
3659         }
3660         con2 += puti->bias;
3661         bb->stmts[i]
3662            = IRStmt_PutI(mkIRPutI(puti->descr,
3663                                   IRExpr_RdTmp(var2),
3664                                   con2,
3665                                   puti->data));
3666         if (DEBUG_IROPT) {
3667            ppIRStmt(bb->stmts[i]);
3668            vex_printf("\n");
3669         }
3670         continue;
3671      }
3672
3673   } /* for */
3674}
3675
3676
3677/*---------------------------------------------------------------*/
3678/*--- PutI/GetI transformations                               ---*/
3679/*---------------------------------------------------------------*/
3680
3681/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
3682   the given starting point to find, if any, a PutI which writes
3683   exactly the same piece of guest state, and so return the expression
3684   that the PutI writes.  This is the core of PutI-GetI forwarding. */
3685
3686static
3687IRExpr* findPutI ( IRSB* bb, Int startHere,
3688                   IRRegArray* descrG, IRExpr* ixG, Int biasG )
3689{
3690   Int        j;
3691   IRStmt*    st;
3692   GSAliasing relation;
3693
3694   if (0) {
3695      vex_printf("\nfindPutI ");
3696      ppIRRegArray(descrG);
3697      vex_printf(" ");
3698      ppIRExpr(ixG);
3699      vex_printf(" %d\n", biasG);
3700   }
3701
3702   /* Scan backwards in bb from startHere to find a suitable PutI
3703      binding for (descrG, ixG, biasG), if any. */
3704
3705   for (j = startHere; j >= 0; j--) {
3706      st = bb->stmts[j];
3707      if (st->tag == Ist_NoOp)
3708         continue;
3709
3710      if (st->tag == Ist_Put) {
3711         /* Non-indexed Put.  This can't give a binding, but we do
3712            need to check it doesn't invalidate the search by
3713            overlapping any part of the indexed guest state. */
3714
3715         relation
3716            = getAliasingRelation_IC(
3717                 descrG, ixG,
3718                 st->Ist.Put.offset,
3719                 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
3720
3721         if (relation == NoAlias) {
3722            /* we're OK; keep going */
3723            continue;
3724         } else {
3725            /* relation == UnknownAlias || relation == ExactAlias */
3726            /* If this assertion fails, we've found a Put which writes
3727               an area of guest state which is read by a GetI.  Which
3728               is unlikely (although not per se wrong). */
3729            vassert(relation != ExactAlias);
3730            /* This Put potentially writes guest state that the GetI
3731               reads; we must fail. */
3732            return NULL;
3733         }
3734      }
3735
3736      if (st->tag == Ist_PutI) {
3737         IRPutI *puti = st->Ist.PutI.details;
3738
3739         relation = getAliasingRelation_II(
3740                       descrG, ixG, biasG,
3741                       puti->descr,
3742                       puti->ix,
3743                       puti->bias
3744                    );
3745
3746         if (relation == NoAlias) {
3747            /* This PutI definitely doesn't overlap.  Ignore it and
3748               keep going. */
3749            continue; /* the for j loop */
3750         }
3751
3752         if (relation == UnknownAlias) {
3753            /* We don't know if this PutI writes to the same guest
3754               state that the GetI, or not.  So we have to give up. */
3755            return NULL;
3756         }
3757
3758         /* Otherwise, we've found what we're looking for.  */
3759         vassert(relation == ExactAlias);
3760         return puti->data;
3761
3762      } /* if (st->tag == Ist_PutI) */
3763
3764      if (st->tag == Ist_Dirty) {
3765         /* Be conservative.  If the dirty call has any guest effects at
3766            all, give up.  We could do better -- only give up if there
3767            are any guest writes/modifies. */
3768         if (st->Ist.Dirty.details->nFxState > 0)
3769            return NULL;
3770      }
3771
3772   } /* for */
3773
3774   /* No valid replacement was found. */
3775   return NULL;
3776}
3777
3778
3779
3780/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3781   that it writes exactly the same piece of guest state) ?  Safe
3782   answer: False. */
3783
3784static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3785{
3786   vassert(pi->tag == Ist_PutI);
3787   if (s2->tag != Ist_PutI)
3788      return False;
3789
3790   IRPutI *p1 = pi->Ist.PutI.details;
3791   IRPutI *p2 = s2->Ist.PutI.details;
3792
3793   return toBool(
3794          getAliasingRelation_II(
3795             p1->descr, p1->ix, p1->bias,
3796             p2->descr, p2->ix, p2->bias
3797          )
3798          == ExactAlias
3799          );
3800}
3801
3802
3803/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3804   overlap it?  Safe answer: True.  Note, we could do a lot better
3805   than this if needed. */
3806
3807static
3808Bool guestAccessWhichMightOverlapPutI (
3809        IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3810     )
3811{
3812   GSAliasing relation;
3813   UInt       minoffP, maxoffP;
3814
3815   vassert(pi->tag == Ist_PutI);
3816
3817   IRPutI *p1 = pi->Ist.PutI.details;
3818
3819   getArrayBounds(p1->descr, &minoffP, &maxoffP);
3820   switch (s2->tag) {
3821
3822      case Ist_NoOp:
3823      case Ist_IMark:
3824         return False;
3825
3826      case Ist_MBE:
3827      case Ist_AbiHint:
3828         /* just be paranoid ... these should be rare. */
3829         return True;
3830
3831      case Ist_CAS:
3832         /* This is unbelievably lame, but it's probably not
3833            significant from a performance point of view.  Really, a
3834            CAS is a load-store op, so it should be safe to say False.
3835            However .. */
3836         return True;
3837
3838      case Ist_Dirty:
3839         /* If the dirty call has any guest effects at all, give up.
3840            Probably could do better. */
3841         if (s2->Ist.Dirty.details->nFxState > 0)
3842            return True;
3843         return False;
3844
3845      case Ist_Put:
3846         vassert(isIRAtom(s2->Ist.Put.data));
3847         relation
3848            = getAliasingRelation_IC(
3849                 p1->descr, p1->ix,
3850                 s2->Ist.Put.offset,
3851                 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3852              );
3853         goto have_relation;
3854
3855      case Ist_PutI: {
3856         IRPutI *p2 = s2->Ist.PutI.details;
3857
3858         vassert(isIRAtom(p2->ix));
3859         vassert(isIRAtom(p2->data));
3860         relation
3861            = getAliasingRelation_II(
3862                 p1->descr, p1->ix, p1->bias,
3863                 p2->descr, p2->ix, p2->bias
3864              );
3865         goto have_relation;
3866      }
3867
3868      case Ist_WrTmp:
3869         if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3870            relation
3871               = getAliasingRelation_II(
3872                    p1->descr, p1->ix, p1->bias,
3873                    s2->Ist.WrTmp.data->Iex.GetI.descr,
3874                    s2->Ist.WrTmp.data->Iex.GetI.ix,
3875                    s2->Ist.WrTmp.data->Iex.GetI.bias
3876                 );
3877            goto have_relation;
3878         }
3879         if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3880            relation
3881               = getAliasingRelation_IC(
3882                    p1->descr, p1->ix,
3883                    s2->Ist.WrTmp.data->Iex.Get.offset,
3884                    s2->Ist.WrTmp.data->Iex.Get.ty
3885                 );
3886            goto have_relation;
3887         }
3888         return False;
3889
3890      case Ist_Store:
3891         vassert(isIRAtom(s2->Ist.Store.addr));
3892         vassert(isIRAtom(s2->Ist.Store.data));
3893         return False;
3894
3895      default:
3896         vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3897         vpanic("guestAccessWhichMightOverlapPutI");
3898   }
3899
3900  have_relation:
3901   if (relation == NoAlias)
3902      return False;
3903   else
3904      return True; /* ExactAlias or UnknownAlias */
3905}
3906
3907
3908
3909/* ---------- PutI/GetI transformations main functions --------- */
3910
3911/* Remove redundant GetIs, to the extent that they can be detected.
3912   bb is modified in-place. */
3913
3914static
3915void do_redundant_GetI_elimination ( IRSB* bb )
3916{
3917   Int     i;
3918   IRStmt* st;
3919
3920   for (i = bb->stmts_used-1; i >= 0; i--) {
3921      st = bb->stmts[i];
3922      if (st->tag == Ist_NoOp)
3923         continue;
3924
3925      if (st->tag == Ist_WrTmp
3926          && st->Ist.WrTmp.data->tag == Iex_GetI
3927          && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3928         IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3929         IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
3930         Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
3931         IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
3932         if (replacement
3933             && isIRAtom(replacement)
3934             /* Make sure we're doing a type-safe transformation! */
3935             && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3936            if (DEBUG_IROPT) {
3937               vex_printf("rGI:  ");
3938               ppIRExpr(st->Ist.WrTmp.data);
3939               vex_printf(" -> ");
3940               ppIRExpr(replacement);
3941               vex_printf("\n");
3942            }
3943            bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3944         }
3945      }
3946   }
3947
3948}
3949
3950
3951/* Remove redundant PutIs, to the extent which they can be detected.
3952   bb is modified in-place. */
3953
3954static
3955void do_redundant_PutI_elimination ( IRSB* bb )
3956{
3957   Int    i, j;
3958   Bool   delete;
3959   IRStmt *st, *stj;
3960
3961   vassert
3962      (vex_control.iropt_register_updates == VexRegUpdUnwindregsAtMemAccess
3963       || vex_control.iropt_register_updates == VexRegUpdAllregsAtMemAccess);
3964
3965   for (i = 0; i < bb->stmts_used; i++) {
3966      st = bb->stmts[i];
3967      if (st->tag != Ist_PutI)
3968         continue;
3969      /* Ok, search forwards from here to see if we can find another
3970         PutI which makes this one redundant, and dodging various
3971         hazards.  Search forwards:
3972         * If conditional exit, give up (because anything after that
3973           does not postdominate this put).
3974         * If a Get which might overlap, give up (because this PutI
3975           not necessarily dead).
3976         * If a Put which is identical, stop with success.
3977         * If a Put which might overlap, but is not identical, give up.
3978         * If a dirty helper call which might write guest state, give up.
3979         * If a Put which definitely doesn't overlap, or any other
3980           kind of stmt, continue.
3981      */
3982      delete = False;
3983      for (j = i+1; j < bb->stmts_used; j++) {
3984         stj = bb->stmts[j];
3985         if (stj->tag == Ist_NoOp)
3986            continue;
3987         if (identicalPutIs(st, stj)) {
3988            /* success! */
3989            delete = True;
3990            break;
3991         }
3992         if (stj->tag == Ist_Exit)
3993            /* give up */
3994            break;
3995         if (st->tag == Ist_Dirty)
3996            /* give up; could do better here */
3997            break;
3998         if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3999            /* give up */
4000           break;
4001      }
4002
4003      if (delete) {
4004         if (DEBUG_IROPT) {
4005            vex_printf("rPI:  ");
4006            ppIRStmt(st);
4007            vex_printf("\n");
4008         }
4009         bb->stmts[i] = IRStmt_NoOp();
4010      }
4011
4012   }
4013}
4014
4015
4016/*---------------------------------------------------------------*/
4017/*--- Loop unrolling                                          ---*/
4018/*---------------------------------------------------------------*/
4019
4020/* Adjust all tmp values (names) in e by delta.  e is destructively
4021   modified. */
4022
4023static void deltaIRExpr ( IRExpr* e, Int delta )
4024{
4025   Int i;
4026   switch (e->tag) {
4027      case Iex_RdTmp:
4028         e->Iex.RdTmp.tmp += delta;
4029         break;
4030      case Iex_Get:
4031      case Iex_Const:
4032         break;
4033      case Iex_GetI:
4034         deltaIRExpr(e->Iex.GetI.ix, delta);
4035         break;
4036      case Iex_Qop:
4037         deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4038         deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4039         deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4040         deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4041         break;
4042      case Iex_Triop:
4043         deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4044         deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4045         deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4046         break;
4047      case Iex_Binop:
4048         deltaIRExpr(e->Iex.Binop.arg1, delta);
4049         deltaIRExpr(e->Iex.Binop.arg2, delta);
4050         break;
4051      case Iex_Unop:
4052         deltaIRExpr(e->Iex.Unop.arg, delta);
4053         break;
4054      case Iex_Load:
4055         deltaIRExpr(e->Iex.Load.addr, delta);
4056         break;
4057      case Iex_CCall:
4058         for (i = 0; e->Iex.CCall.args[i]; i++)
4059            deltaIRExpr(e->Iex.CCall.args[i], delta);
4060         break;
4061      case Iex_Mux0X:
4062         deltaIRExpr(e->Iex.Mux0X.cond, delta);
4063         deltaIRExpr(e->Iex.Mux0X.expr0, delta);
4064         deltaIRExpr(e->Iex.Mux0X.exprX, delta);
4065         break;
4066      default:
4067         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4068         vpanic("deltaIRExpr");
4069   }
4070}
4071
4072/* Adjust all tmp values (names) in st by delta.  st is destructively
4073   modified. */
4074
4075static void deltaIRStmt ( IRStmt* st, Int delta )
4076{
4077   Int      i;
4078   IRDirty* d;
4079   switch (st->tag) {
4080      case Ist_NoOp:
4081      case Ist_IMark:
4082      case Ist_MBE:
4083         break;
4084      case Ist_AbiHint:
4085         deltaIRExpr(st->Ist.AbiHint.base, delta);
4086         deltaIRExpr(st->Ist.AbiHint.nia, delta);
4087         break;
4088      case Ist_Put:
4089         deltaIRExpr(st->Ist.Put.data, delta);
4090         break;
4091      case Ist_PutI:
4092         deltaIRExpr(st->Ist.PutI.details->ix, delta);
4093         deltaIRExpr(st->Ist.PutI.details->data, delta);
4094         break;
4095      case Ist_WrTmp:
4096         st->Ist.WrTmp.tmp += delta;
4097         deltaIRExpr(st->Ist.WrTmp.data, delta);
4098         break;
4099      case Ist_Exit:
4100         deltaIRExpr(st->Ist.Exit.guard, delta);
4101         break;
4102      case Ist_Store:
4103         deltaIRExpr(st->Ist.Store.addr, delta);
4104         deltaIRExpr(st->Ist.Store.data, delta);
4105         break;
4106      case Ist_CAS:
4107         if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4108            st->Ist.CAS.details->oldHi += delta;
4109         st->Ist.CAS.details->oldLo += delta;
4110         deltaIRExpr(st->Ist.CAS.details->addr, delta);
4111         if (st->Ist.CAS.details->expdHi)
4112            deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4113         deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4114         if (st->Ist.CAS.details->dataHi)
4115            deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4116         deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4117         break;
4118      case Ist_LLSC:
4119         st->Ist.LLSC.result += delta;
4120         deltaIRExpr(st->Ist.LLSC.addr, delta);
4121         if (st->Ist.LLSC.storedata)
4122            deltaIRExpr(st->Ist.LLSC.storedata, delta);
4123         break;
4124      case Ist_Dirty:
4125         d = st->Ist.Dirty.details;
4126         deltaIRExpr(d->guard, delta);
4127         for (i = 0; d->args[i]; i++)
4128            deltaIRExpr(d->args[i], delta);
4129         if (d->tmp != IRTemp_INVALID)
4130            d->tmp += delta;
4131         if (d->mAddr)
4132            deltaIRExpr(d->mAddr, delta);
4133         break;
4134      default:
4135         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4136         vpanic("deltaIRStmt");
4137   }
4138}
4139
4140
4141/* If possible, return a loop-unrolled version of bb0.  The original
4142   is changed.  If not possible, return NULL.  */
4143
4144/* The two schemas considered are:
4145
4146     X: BODY; goto X
4147
4148     which unrolls to (eg)  X: BODY;BODY; goto X
4149
4150   and
4151
4152       X: BODY; if (c) goto X; goto Y
4153   which trivially transforms to
4154       X: BODY; if (!c) goto Y; goto X;
4155   so it falls in the scope of the first case.
4156
4157   X and Y must be literal (guest) addresses.
4158*/
4159
4160static Int calc_unroll_factor( IRSB* bb )
4161{
4162   Int n_stmts, i;
4163
4164   n_stmts = 0;
4165   for (i = 0; i < bb->stmts_used; i++) {
4166      if (bb->stmts[i]->tag != Ist_NoOp)
4167         n_stmts++;
4168   }
4169
4170   if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4171      if (vex_control.iropt_verbosity > 0)
4172         vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4173                    n_stmts, 8* n_stmts);
4174      return 8;
4175   }
4176   if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4177      if (vex_control.iropt_verbosity > 0)
4178         vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4179                    n_stmts, 4* n_stmts);
4180      return 4;
4181   }
4182
4183   if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4184      if (vex_control.iropt_verbosity > 0)
4185         vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4186                    n_stmts, 2* n_stmts);
4187      return 2;
4188   }
4189
4190   if (vex_control.iropt_verbosity > 0)
4191      vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4192
4193   return 1;
4194}
4195
4196
4197static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
4198{
4199   Int      i, j, jmax, n_vars;
4200   Bool     xxx_known;
4201   Addr64   xxx_value, yyy_value;
4202   IRExpr*  udst;
4203   IRStmt*  st;
4204   IRConst* con;
4205   IRSB     *bb1, *bb2;
4206   Int      unroll_factor;
4207
4208   if (vex_control.iropt_unroll_thresh <= 0)
4209      return NULL;
4210
4211   /* First off, figure out if we can unroll this loop.  Do this
4212      without modifying bb0. */
4213
4214   if (bb0->jumpkind != Ijk_Boring)
4215      return NULL;
4216
4217   xxx_known = False;
4218   xxx_value = 0;
4219
4220   /* Extract the next-guest address.  If it isn't a literal, we
4221      have to give up. */
4222
4223   udst = bb0->next;
4224   if (udst->tag == Iex_Const
4225       && (udst->Iex.Const.con->tag == Ico_U32
4226           || udst->Iex.Const.con->tag == Ico_U64)) {
4227      /* The BB ends in a jump to a literal location. */
4228      xxx_known = True;
4229      xxx_value = udst->Iex.Const.con->tag == Ico_U64
4230                    ?  udst->Iex.Const.con->Ico.U64
4231                    : (Addr64)(udst->Iex.Const.con->Ico.U32);
4232   }
4233
4234   if (!xxx_known)
4235      return NULL;
4236
4237   /* Now we know the BB ends to a jump to a literal location.  If
4238      it's a jump to itself (viz, idiom #1), move directly to the
4239      unrolling stage, first cloning the bb so the original isn't
4240      modified. */
4241   if (xxx_value == my_addr) {
4242      unroll_factor = calc_unroll_factor( bb0 );
4243      if (unroll_factor < 2)
4244         return NULL;
4245      bb1 = deepCopyIRSB( bb0 );
4246      bb0 = NULL;
4247      udst = NULL; /* is now invalid */
4248      goto do_unroll;
4249   }
4250
4251   /* Search for the second idiomatic form:
4252        X: BODY; if (c) goto X; goto Y
4253      We know Y, but need to establish that the last stmt
4254      is 'if (c) goto X'.
4255   */
4256   yyy_value = xxx_value;
4257   for (i = bb0->stmts_used-1; i >= 0; i--)
4258      if (bb0->stmts[i])
4259         break;
4260
4261   if (i < 0)
4262      return NULL; /* block with no stmts.  Strange. */
4263
4264   st = bb0->stmts[i];
4265   if (st->tag != Ist_Exit)
4266      return NULL;
4267   if (st->Ist.Exit.jk != Ijk_Boring)
4268      return NULL;
4269
4270   con = st->Ist.Exit.dst;
4271   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4272
4273   xxx_value = con->tag == Ico_U64
4274                  ? st->Ist.Exit.dst->Ico.U64
4275                  : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4276
4277   /* If this assertion fails, we have some kind of type error. */
4278   vassert(con->tag == udst->Iex.Const.con->tag);
4279
4280   if (xxx_value != my_addr)
4281      /* We didn't find either idiom.  Give up. */
4282      return NULL;
4283
4284   /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
4285      yyy values (which makes it look like idiom #1), and go into
4286      unrolling proper.  This means finding (again) the last stmt, in
4287      the copied BB. */
4288
4289   unroll_factor = calc_unroll_factor( bb0 );
4290   if (unroll_factor < 2)
4291      return NULL;
4292
4293   bb1 = deepCopyIRSB( bb0 );
4294   bb0 = NULL;
4295   udst = NULL; /* is now invalid */
4296   for (i = bb1->stmts_used-1; i >= 0; i--)
4297      if (bb1->stmts[i])
4298         break;
4299
4300   /* The next bunch of assertions should be true since we already
4301      found and checked the last stmt in the original bb. */
4302
4303   vassert(i >= 0);
4304
4305   st = bb1->stmts[i];
4306   vassert(st->tag == Ist_Exit);
4307
4308   con = st->Ist.Exit.dst;
4309   vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4310
4311   udst = bb1->next;
4312   vassert(udst->tag == Iex_Const);
4313   vassert(udst->Iex.Const.con->tag == Ico_U32
4314          || udst->Iex.Const.con->tag == Ico_U64);
4315   vassert(con->tag == udst->Iex.Const.con->tag);
4316
4317   /* switch the xxx and yyy fields around */
4318   if (con->tag == Ico_U64) {
4319      udst->Iex.Const.con->Ico.U64 = xxx_value;
4320      con->Ico.U64 = yyy_value;
4321   } else {
4322      udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4323      con->Ico.U32 = (UInt)yyy_value;
4324   }
4325
4326   /* negate the test condition */
4327   st->Ist.Exit.guard
4328      = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4329
4330   /* --- The unroller proper.  Both idioms are by now --- */
4331   /* --- now converted to idiom 1. --- */
4332
4333  do_unroll:
4334
4335   vassert(unroll_factor == 2
4336           || unroll_factor == 4
4337           || unroll_factor == 8);
4338
4339   jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4340   for (j = 1; j <= jmax; j++) {
4341
4342      n_vars = bb1->tyenv->types_used;
4343
4344      bb2 = deepCopyIRSB(bb1);
4345      for (i = 0; i < n_vars; i++)
4346         (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4347
4348      for (i = 0; i < bb2->stmts_used; i++) {
4349         /* deltaIRStmt destructively modifies the stmt, but
4350            that's OK since bb2 is a complete fresh copy of bb1. */
4351         deltaIRStmt(bb2->stmts[i], n_vars);
4352         addStmtToIRSB(bb1, bb2->stmts[i]);
4353      }
4354   }
4355
4356   if (DEBUG_IROPT) {
4357      vex_printf("\nUNROLLED (%llx)\n", my_addr);
4358      ppIRSB(bb1);
4359      vex_printf("\n");
4360   }
4361
4362   /* Flattening; sigh.  The unroller succeeds in breaking flatness
4363      by negating the test condition.  This should be fixed properly.
4364      For the moment use this shotgun approach.  */
4365   return flatten_BB(bb1);
4366}
4367
4368
4369/*---------------------------------------------------------------*/
4370/*--- The tree builder                                        ---*/
4371/*---------------------------------------------------------------*/
4372
4373/* This isn't part of IR optimisation.  Really it's a pass done prior
4374   to instruction selection, which improves the code that the
4375   instruction selector can produce. */
4376
4377/* --- The 'tmp' environment is the central data structure here --- */
4378
4379/* The number of outstanding bindings we're prepared to track.
4380   The number of times the env becomes full and we have to dump
4381   the oldest binding (hence reducing code quality) falls very
4382   rapidly as the env size increases.  8 gives reasonable performance
4383   under most circumstances. */
4384#define A_NENV 10
4385
4386/* bindee == NULL   ===  slot is not in use
4387   bindee != NULL   ===  slot is in use
4388*/
4389typedef
4390   struct {
4391      IRTemp  binder;
4392      IRExpr* bindee;
4393      Bool    doesLoad;
4394      Bool    doesGet;
4395   }
4396   ATmpInfo;
4397
4398__attribute__((unused))
4399static void ppAEnv ( ATmpInfo* env )
4400{
4401   Int i;
4402   for (i = 0; i < A_NENV; i++) {
4403      vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
4404      if (env[i].bindee)
4405         ppIRExpr(env[i].bindee);
4406      else
4407         vex_printf("(null)");
4408      vex_printf("\n");
4409   }
4410}
4411
4412/* --- Tree-traversal fns --- */
4413
4414/* Traverse an expr, and detect if any part of it reads memory or does
4415   a Get.  Be careful ... this really controls how much the
4416   tree-builder can reorder the code, so getting it right is critical.
4417*/
4418static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
4419{
4420   Int i;
4421   switch (e->tag) {
4422      case Iex_CCall:
4423         for (i = 0; e->Iex.CCall.args[i]; i++)
4424            setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
4425         return;
4426      case Iex_Mux0X:
4427         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
4428         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
4429         setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
4430         return;
4431      case Iex_Qop:
4432         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg1);
4433         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg2);
4434         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg3);
4435         setHints_Expr(doesLoad, doesGet, e->Iex.Qop.details->arg4);
4436         return;
4437      case Iex_Triop:
4438         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg1);
4439         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg2);
4440         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.details->arg3);
4441         return;
4442      case Iex_Binop:
4443         setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
4444         setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
4445         return;
4446      case Iex_Unop:
4447         setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
4448         return;
4449      case Iex_Load:
4450         *doesLoad = True;
4451         setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
4452         return;
4453      case Iex_Get:
4454         *doesGet = True;
4455         return;
4456      case Iex_GetI:
4457         *doesGet = True;
4458         setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
4459         return;
4460      case Iex_RdTmp:
4461      case Iex_Const:
4462         return;
4463      default:
4464         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4465         vpanic("setHints_Expr");
4466   }
4467}
4468
4469
4470/* Add a binding to the front of the env and slide all the rest
4471   backwards.  It should be the case that the last slot is free. */
4472static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
4473{
4474   Int i;
4475   vassert(env[A_NENV-1].bindee == NULL);
4476   for (i = A_NENV-1; i >= 1; i--)
4477      env[i] = env[i-1];
4478   env[0].binder   = binder;
4479   env[0].bindee   = bindee;
4480   env[0].doesLoad = False; /* filled in later */
4481   env[0].doesGet  = False; /* filled in later */
4482}
4483
4484/* Given uses :: array of UShort, indexed by IRTemp
4485   Add the use-occurrences of temps in this expression
4486   to the env.
4487*/
4488static void aoccCount_Expr ( UShort* uses, IRExpr* e )
4489{
4490   Int i;
4491
4492   switch (e->tag) {
4493
4494      case Iex_RdTmp: /* the only interesting case */
4495         uses[e->Iex.RdTmp.tmp]++;
4496         return;
4497
4498      case Iex_Mux0X:
4499         aoccCount_Expr(uses, e->Iex.Mux0X.cond);
4500         aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
4501         aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
4502         return;
4503
4504      case Iex_Qop:
4505         aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
4506         aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
4507         aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
4508         aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
4509         return;
4510
4511      case Iex_Triop:
4512         aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
4513         aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
4514         aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
4515         return;
4516
4517      case Iex_Binop:
4518         aoccCount_Expr(uses, e->Iex.Binop.arg1);
4519         aoccCount_Expr(uses, e->Iex.Binop.arg2);
4520         return;
4521
4522      case Iex_Unop:
4523         aoccCount_Expr(uses, e->Iex.Unop.arg);
4524         return;
4525
4526      case Iex_Load:
4527         aoccCount_Expr(uses, e->Iex.Load.addr);
4528         return;
4529
4530      case Iex_CCall:
4531         for (i = 0; e->Iex.CCall.args[i]; i++)
4532            aoccCount_Expr(uses, e->Iex.CCall.args[i]);
4533         return;
4534
4535      case Iex_GetI:
4536         aoccCount_Expr(uses, e->Iex.GetI.ix);
4537         return;
4538
4539      case Iex_Const:
4540      case Iex_Get:
4541         return;
4542
4543      default:
4544         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4545         vpanic("aoccCount_Expr");
4546    }
4547}
4548
4549
4550/* Given uses :: array of UShort, indexed by IRTemp
4551   Add the use-occurrences of temps in this statement
4552   to the env.
4553*/
4554static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
4555{
4556   Int      i;
4557   IRDirty* d;
4558   IRCAS*   cas;
4559   switch (st->tag) {
4560      case Ist_AbiHint:
4561         aoccCount_Expr(uses, st->Ist.AbiHint.base);
4562         aoccCount_Expr(uses, st->Ist.AbiHint.nia);
4563         return;
4564      case Ist_WrTmp:
4565         aoccCount_Expr(uses, st->Ist.WrTmp.data);
4566         return;
4567      case Ist_Put:
4568         aoccCount_Expr(uses, st->Ist.Put.data);
4569         return;
4570      case Ist_PutI:
4571         aoccCount_Expr(uses, st->Ist.PutI.details->ix);
4572         aoccCount_Expr(uses, st->Ist.PutI.details->data);
4573         return;
4574      case Ist_Store:
4575         aoccCount_Expr(uses, st->Ist.Store.addr);
4576         aoccCount_Expr(uses, st->Ist.Store.data);
4577         return;
4578      case Ist_CAS:
4579         cas = st->Ist.CAS.details;
4580         aoccCount_Expr(uses, cas->addr);
4581         if (cas->expdHi)
4582            aoccCount_Expr(uses, cas->expdHi);
4583         aoccCount_Expr(uses, cas->expdLo);
4584         if (cas->dataHi)
4585            aoccCount_Expr(uses, cas->dataHi);
4586         aoccCount_Expr(uses, cas->dataLo);
4587         return;
4588      case Ist_LLSC:
4589         aoccCount_Expr(uses, st->Ist.LLSC.addr);
4590         if (st->Ist.LLSC.storedata)
4591            aoccCount_Expr(uses, st->Ist.LLSC.storedata);
4592         return;
4593      case Ist_Dirty:
4594         d = st->Ist.Dirty.details;
4595         if (d->mFx != Ifx_None)
4596            aoccCount_Expr(uses, d->mAddr);
4597         aoccCount_Expr(uses, d->guard);
4598         for (i = 0; d->args[i]; i++)
4599            aoccCount_Expr(uses, d->args[i]);
4600         return;
4601      case Ist_NoOp:
4602      case Ist_IMark:
4603      case Ist_MBE:
4604         return;
4605      case Ist_Exit:
4606         aoccCount_Expr(uses, st->Ist.Exit.guard);
4607         return;
4608      default:
4609         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4610         vpanic("aoccCount_Stmt");
4611   }
4612}
4613
4614/* Look up a binding for tmp in the env.  If found, return the bound
4615   expression, and set the env's binding to NULL so it is marked as
4616   used.  If not found, return NULL. */
4617
4618static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
4619{
4620   Int i;
4621   for (i = 0; i < A_NENV; i++) {
4622      if (env[i].binder == tmp && env[i].bindee != NULL) {
4623         IRExpr* bindee = env[i].bindee;
4624         env[i].bindee = NULL;
4625         return bindee;
4626      }
4627   }
4628   return NULL;
4629}
4630
4631/* Traverse e, looking for temps.  For each observed temp, see if env
4632   contains a binding for the temp, and if so return the bound value.
4633   The env has the property that any binding it holds is
4634   'single-shot', so once a binding is used, it is marked as no longer
4635   available, by setting its .bindee field to NULL. */
4636
4637static inline Bool is_Unop ( IRExpr* e, IROp op ) {
4638   return e->tag == Iex_Unop && e->Iex.Unop.op == op;
4639}
4640static inline Bool is_Binop ( IRExpr* e, IROp op ) {
4641   return e->tag == Iex_Binop && e->Iex.Binop.op == op;
4642}
4643
4644static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
4645{
4646   switch (op) {
4647   case Iop_Or32:
4648      /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
4649      if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
4650         return IRExpr_Unop( Iop_CmpwNEZ32,
4651                             IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
4652                                                     a2->Iex.Unop.arg ) );
4653      break;
4654
4655   case Iop_CmpNE32:
4656      /* Since X has type Ity_I1 we can simplify:
4657         CmpNE32(1Uto32(X),0)) ==> X */
4658      if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
4659         return a1->Iex.Unop.arg;
4660      break;
4661
4662   default:
4663      break;
4664   }
4665   /* no reduction rule applies */
4666   return IRExpr_Binop( op, a1, a2 );
4667}
4668
4669static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
4670{
4671   switch (op) {
4672   case Iop_CmpwNEZ64:
4673      /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4674      if (is_Binop(aa, Iop_Or64)
4675          && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
4676         return fold_IRExpr_Unop(
4677                   Iop_CmpwNEZ64,
4678                   IRExpr_Binop(Iop_Or64,
4679                                aa->Iex.Binop.arg1->Iex.Unop.arg,
4680                                aa->Iex.Binop.arg2));
4681      /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4682      if (is_Binop(aa, Iop_Or64)
4683          && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
4684         return fold_IRExpr_Unop(
4685                   Iop_CmpwNEZ64,
4686                   IRExpr_Binop(Iop_Or64,
4687                                aa->Iex.Binop.arg1,
4688                                aa->Iex.Binop.arg2->Iex.Unop.arg));
4689      break;
4690   case Iop_CmpNEZ64:
4691      /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
4692      if (is_Unop(aa, Iop_Left64))
4693         return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
4694      break;
4695   case Iop_CmpwNEZ32:
4696      /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
4697      if (is_Unop(aa, Iop_CmpwNEZ32))
4698         return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
4699      break;
4700   case Iop_CmpNEZ32:
4701      /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
4702      if (is_Unop(aa, Iop_Left32))
4703         return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
4704      /* CmpNEZ32( 1Uto32(X) ) --> X */
4705      if (is_Unop(aa, Iop_1Uto32))
4706         return aa->Iex.Unop.arg;
4707      break;
4708   case Iop_CmpNEZ8:
4709      /* CmpNEZ8( 1Uto8(X) ) --> X */
4710      if (is_Unop(aa, Iop_1Uto8))
4711         return aa->Iex.Unop.arg;
4712      break;
4713   case Iop_Left32:
4714      /* Left32( Left32(x) ) --> Left32(x) */
4715      if (is_Unop(aa, Iop_Left32))
4716         return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
4717      break;
4718   case Iop_32to1:
4719      /* 32to1( 1Uto32 ( x ) ) --> x */
4720      if (is_Unop(aa, Iop_1Uto32))
4721         return aa->Iex.Unop.arg;
4722      /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
4723      if (is_Unop(aa, Iop_CmpwNEZ32))
4724         return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
4725      break;
4726   case Iop_64to1:
4727      /* 64to1( 1Uto64 ( x ) ) --> x */
4728      if (is_Unop(aa, Iop_1Uto64))
4729         return aa->Iex.Unop.arg;
4730      /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
4731      if (is_Unop(aa, Iop_CmpwNEZ64))
4732         return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
4733      break;
4734   case Iop_64to32:
4735      /* 64to32( 32Uto64 ( x )) --> x */
4736      if (is_Unop(aa, Iop_32Uto64))
4737         return aa->Iex.Unop.arg;
4738      /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
4739      if (is_Unop(aa, Iop_8Uto64))
4740         return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
4741      break;
4742
4743   case Iop_32Uto64:
4744      /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
4745      if (is_Unop(aa, Iop_8Uto32))
4746         return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
4747      /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
4748      if (is_Unop(aa, Iop_16Uto32))
4749         return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
4750      /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
4751                     --> Shr64( 32Uto64(64to32(x)), sh )) */
4752      if (is_Unop(aa, Iop_64to32)
4753          && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
4754          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
4755          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
4756                     Iop_64to32)) {
4757         return aa->Iex.Unop.arg;
4758      }
4759      /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
4760         --> 32Uto64(64to32( Shl64(                x,   sh )) */
4761      if (is_Unop(aa, Iop_64to32)
4762          && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
4763          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
4764          && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
4765                     Iop_64to32)) {
4766         return
4767            IRExpr_Unop(
4768               Iop_32Uto64,
4769               IRExpr_Unop(
4770                  Iop_64to32,
4771                  IRExpr_Binop(
4772                     Iop_Shl64,
4773                     aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
4774                     aa->Iex.Unop.arg->Iex.Binop.arg2
4775            )));
4776      }
4777      break;
4778
4779   case Iop_1Sto32:
4780      /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
4781      if (is_Unop(aa, Iop_CmpNEZ8)
4782          && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
4783          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
4784          && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
4785                     Iop_CmpNEZ32)) {
4786         return IRExpr_Unop( Iop_CmpwNEZ32,
4787                             aa->Iex.Unop.arg->Iex.Unop.arg
4788                               ->Iex.Unop.arg->Iex.Unop.arg);
4789      }
4790      break;
4791
4792   default:
4793      break;
4794   }
4795   /* no reduction rule applies */
4796   return IRExpr_Unop( op, aa );
4797}
4798
4799static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
4800{
4801   IRExpr*  e2;
4802   IRExpr** args2;
4803   Int      i;
4804
4805   switch (e->tag) {
4806
4807      case Iex_CCall:
4808         args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
4809         for (i = 0; args2[i]; i++)
4810            args2[i] = atbSubst_Expr(env,args2[i]);
4811         return IRExpr_CCall(
4812                   e->Iex.CCall.cee,
4813                   e->Iex.CCall.retty,
4814                   args2
4815                );
4816      case Iex_RdTmp:
4817         e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4818         return e2 ? e2 : e;
4819      case Iex_Mux0X:
4820         return IRExpr_Mux0X(
4821                   atbSubst_Expr(env, e->Iex.Mux0X.cond),
4822                   atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4823                   atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4824                );
4825      case Iex_Qop:
4826         return IRExpr_Qop(
4827                   e->Iex.Qop.details->op,
4828                   atbSubst_Expr(env, e->Iex.Qop.details->arg1),
4829                   atbSubst_Expr(env, e->Iex.Qop.details->arg2),
4830                   atbSubst_Expr(env, e->Iex.Qop.details->arg3),
4831                   atbSubst_Expr(env, e->Iex.Qop.details->arg4)
4832                );
4833      case Iex_Triop:
4834         return IRExpr_Triop(
4835                   e->Iex.Triop.details->op,
4836                   atbSubst_Expr(env, e->Iex.Triop.details->arg1),
4837                   atbSubst_Expr(env, e->Iex.Triop.details->arg2),
4838                   atbSubst_Expr(env, e->Iex.Triop.details->arg3)
4839                );
4840      case Iex_Binop:
4841         return fold_IRExpr_Binop(
4842                   e->Iex.Binop.op,
4843                   atbSubst_Expr(env, e->Iex.Binop.arg1),
4844                   atbSubst_Expr(env, e->Iex.Binop.arg2)
4845                );
4846      case Iex_Unop:
4847         return fold_IRExpr_Unop(
4848                   e->Iex.Unop.op,
4849                   atbSubst_Expr(env, e->Iex.Unop.arg)
4850                );
4851      case Iex_Load:
4852         return IRExpr_Load(
4853                   e->Iex.Load.end,
4854                   e->Iex.Load.ty,
4855                   atbSubst_Expr(env, e->Iex.Load.addr)
4856                );
4857      case Iex_GetI:
4858         return IRExpr_GetI(
4859                   e->Iex.GetI.descr,
4860                   atbSubst_Expr(env, e->Iex.GetI.ix),
4861                   e->Iex.GetI.bias
4862                );
4863      case Iex_Const:
4864      case Iex_Get:
4865         return e;
4866      default:
4867         vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4868         vpanic("atbSubst_Expr");
4869   }
4870}
4871
4872/* Same deal as atbSubst_Expr, except for stmts. */
4873
4874static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4875{
4876   Int     i;
4877   IRDirty *d, *d2;
4878   IRCAS   *cas, *cas2;
4879   IRPutI  *puti, *puti2;
4880
4881   switch (st->tag) {
4882      case Ist_AbiHint:
4883         return IRStmt_AbiHint(
4884                   atbSubst_Expr(env, st->Ist.AbiHint.base),
4885                   st->Ist.AbiHint.len,
4886                   atbSubst_Expr(env, st->Ist.AbiHint.nia)
4887                );
4888      case Ist_Store:
4889         return IRStmt_Store(
4890                   st->Ist.Store.end,
4891                   atbSubst_Expr(env, st->Ist.Store.addr),
4892                   atbSubst_Expr(env, st->Ist.Store.data)
4893                );
4894      case Ist_WrTmp:
4895         return IRStmt_WrTmp(
4896                   st->Ist.WrTmp.tmp,
4897                   atbSubst_Expr(env, st->Ist.WrTmp.data)
4898                );
4899      case Ist_Put:
4900         return IRStmt_Put(
4901                   st->Ist.Put.offset,
4902                   atbSubst_Expr(env, st->Ist.Put.data)
4903                );
4904      case Ist_PutI:
4905         puti  = st->Ist.PutI.details;
4906         puti2 = mkIRPutI(puti->descr,
4907                          atbSubst_Expr(env, puti->ix),
4908                          puti->bias,
4909                          atbSubst_Expr(env, puti->data));
4910         return IRStmt_PutI(puti2);
4911
4912      case Ist_Exit:
4913         return IRStmt_Exit(
4914                   atbSubst_Expr(env, st->Ist.Exit.guard),
4915                   st->Ist.Exit.jk,
4916                   st->Ist.Exit.dst,
4917                   st->Ist.Exit.offsIP
4918                );
4919      case Ist_IMark:
4920         return IRStmt_IMark(st->Ist.IMark.addr,
4921                             st->Ist.IMark.len,
4922                             st->Ist.IMark.delta);
4923      case Ist_NoOp:
4924         return IRStmt_NoOp();
4925      case Ist_MBE:
4926         return IRStmt_MBE(st->Ist.MBE.event);
4927      case Ist_CAS:
4928         cas  = st->Ist.CAS.details;
4929         cas2 = mkIRCAS(
4930                   cas->oldHi, cas->oldLo, cas->end,
4931                   atbSubst_Expr(env, cas->addr),
4932                   cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4933                   atbSubst_Expr(env, cas->expdLo),
4934                   cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4935                   atbSubst_Expr(env, cas->dataLo)
4936                );
4937         return IRStmt_CAS(cas2);
4938      case Ist_LLSC:
4939         return IRStmt_LLSC(
4940                   st->Ist.LLSC.end,
4941                   st->Ist.LLSC.result,
4942                   atbSubst_Expr(env, st->Ist.LLSC.addr),
4943                   st->Ist.LLSC.storedata
4944                      ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4945                );
4946      case Ist_Dirty:
4947         d  = st->Ist.Dirty.details;
4948         d2 = emptyIRDirty();
4949         *d2 = *d;
4950         if (d2->mFx != Ifx_None)
4951            d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4952         d2->guard = atbSubst_Expr(env, d2->guard);
4953         for (i = 0; d2->args[i]; i++)
4954            d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4955         return IRStmt_Dirty(d2);
4956      default:
4957         vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4958         vpanic("atbSubst_Stmt");
4959   }
4960}
4961
4962/* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb )
4963{
4964   Int      i, j, k, m;
4965   Bool     stmtPuts, stmtStores, invalidateMe;
4966   IRStmt*  st;
4967   IRStmt*  st2;
4968   ATmpInfo env[A_NENV];
4969
4970   Bool   max_ga_known = False;
4971   Addr64 max_ga       = 0;
4972
4973   Int       n_tmps = bb->tyenv->types_used;
4974   UShort*   uses   = LibVEX_Alloc(n_tmps * sizeof(UShort));
4975
4976   /* Phase 1.  Scan forwards in bb, counting use occurrences of each
4977      temp.  Also count occurrences in the bb->next field.  Take the
4978      opportunity to also find the maximum guest address in the block,
4979      since that will be needed later for deciding when we can safely
4980      elide event checks. */
4981
4982   for (i = 0; i < n_tmps; i++)
4983      uses[i] = 0;
4984
4985   for (i = 0; i < bb->stmts_used; i++) {
4986      st = bb->stmts[i];
4987      switch (st->tag) {
4988         case Ist_NoOp:
4989            continue;
4990         case Ist_IMark: {
4991            Int    len = st->Ist.IMark.len;
4992            Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
4993            max_ga_known = True;
4994            if (mga > max_ga)
4995               max_ga = mga;
4996            break;
4997         }
4998         default:
4999            break;
5000      }
5001      aoccCount_Stmt( uses, st );
5002   }
5003   aoccCount_Expr(uses, bb->next );
5004
5005#  if 0
5006   for (i = 0; i < n_tmps; i++) {
5007      if (uses[i] == 0)
5008        continue;
5009      ppIRTemp( (IRTemp)i );
5010      vex_printf("  used %d\n", (Int)uses[i] );
5011   }
5012#  endif
5013
5014   /* Phase 2.  Scan forwards in bb.  For each statement in turn:
5015
5016         If the env is full, emit the end element.  This guarantees
5017         there is at least one free slot in the following.
5018
5019         On seeing 't = E', occ(t)==1,
5020            let E'=env(E)
5021            delete this stmt
5022            add t -> E' to the front of the env
5023            Examine E' and set the hints for E' appropriately
5024              (doesLoad? doesGet?)
5025
5026         On seeing any other stmt,
5027            let stmt' = env(stmt)
5028            remove from env any 't=E' binds invalidated by stmt
5029                emit the invalidated stmts
5030            emit stmt'
5031            compact any holes in env
5032              by sliding entries towards the front
5033
5034      Finally, apply env to bb->next.
5035   */
5036
5037   for (i = 0; i < A_NENV; i++) {
5038      env[i].bindee = NULL;
5039      env[i].binder = IRTemp_INVALID;
5040   }
5041
5042   /* The stmts in bb are being reordered, and we are guaranteed to
5043      end up with no more than the number we started with.  Use i to
5044      be the cursor of the current stmt examined and j <= i to be that
5045      for the current stmt being written.
5046   */
5047   j = 0;
5048   for (i = 0; i < bb->stmts_used; i++) {
5049
5050      st = bb->stmts[i];
5051      if (st->tag == Ist_NoOp)
5052         continue;
5053
5054      /* Ensure there's at least one space in the env, by emitting
5055         the oldest binding if necessary. */
5056      if (env[A_NENV-1].bindee != NULL) {
5057         bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5058                                      env[A_NENV-1].bindee );
5059         j++;
5060         vassert(j <= i);
5061         env[A_NENV-1].bindee = NULL;
5062      }
5063
5064      /* Consider current stmt. */
5065      if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5066         IRExpr *e, *e2;
5067
5068         /* optional extra: dump dead bindings as we find them.
5069            Removes the need for a prior dead-code removal pass. */
5070         if (uses[st->Ist.WrTmp.tmp] == 0) {
5071	    if (0) vex_printf("DEAD binding\n");
5072            continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5073         }
5074         vassert(uses[st->Ist.WrTmp.tmp] == 1);
5075
5076         /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
5077            actions. */
5078         e  = st->Ist.WrTmp.data;
5079         e2 = atbSubst_Expr(env, e);
5080         addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5081         setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
5082         /* don't advance j, as we are deleting this stmt and instead
5083            holding it temporarily in the env. */
5084         continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5085      }
5086
5087      /* we get here for any other kind of statement. */
5088      /* 'use up' any bindings required by the current statement. */
5089      st2 = atbSubst_Stmt(env, st);
5090
5091      /* Now, before this stmt, dump any bindings in env that it
5092         invalidates.  These need to be dumped in the order in which
5093         they originally entered env -- that means from oldest to
5094         youngest. */
5095
5096      /* stmtPuts/stmtStores characterise what the stmt under
5097         consideration does, or might do (sidely safe @ True). */
5098      stmtPuts
5099         = toBool( st->tag == Ist_Put
5100                   || st->tag == Ist_PutI
5101                   || st->tag == Ist_Dirty );
5102
5103      /* be True if this stmt writes memory or might do (==> we don't
5104         want to reorder other loads or stores relative to it).  Also,
5105         both LL and SC fall under this classification, since we
5106         really ought to be conservative and not reorder any other
5107         memory transactions relative to them. */
5108      stmtStores
5109         = toBool( st->tag == Ist_Store
5110                   || st->tag == Ist_Dirty
5111                   || st->tag == Ist_LLSC
5112                   || st->tag == Ist_CAS );
5113
5114      for (k = A_NENV-1; k >= 0; k--) {
5115         if (env[k].bindee == NULL)
5116            continue;
5117         /* Compare the actions of this stmt with the actions of
5118            binding 'k', to see if they invalidate the binding. */
5119         invalidateMe
5120            = toBool(
5121              /* a store invalidates loaded data */
5122              (env[k].doesLoad && stmtStores)
5123              /* a put invalidates get'd data */
5124              || (env[k].doesGet && stmtPuts)
5125              /* a put invalidates loaded data.  Note, we could do
5126                 much better here in the sense that we only need to
5127                 invalidate trees containing loads if the Put in
5128                 question is marked as requiring precise
5129                 exceptions. */
5130              || (env[k].doesLoad && stmtPuts)
5131              /* probably overly conservative: a memory bus event
5132                 invalidates absolutely everything, so that all
5133                 computation prior to it is forced to complete before
5134                 proceeding with the event (fence,lock,unlock). */
5135              || st->tag == Ist_MBE
5136              /* also be (probably overly) paranoid re AbiHints */
5137              || st->tag == Ist_AbiHint
5138              );
5139         if (invalidateMe) {
5140            bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5141            j++;
5142            vassert(j <= i);
5143            env[k].bindee = NULL;
5144         }
5145      }
5146
5147      /* Slide in-use entries in env up to the front */
5148      m = 0;
5149      for (k = 0; k < A_NENV; k++) {
5150         if (env[k].bindee != NULL) {
5151            env[m] = env[k];
5152            m++;
5153	 }
5154      }
5155      for (m = m; m < A_NENV; m++) {
5156         env[m].bindee = NULL;
5157      }
5158
5159      /* finally, emit the substituted statement */
5160      bb->stmts[j] = st2;
5161      /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5162      j++;
5163
5164      vassert(j <= i+1);
5165   } /* for each stmt in the original bb ... */
5166
5167   /* Finally ... substitute the ->next field as much as possible, and
5168      dump any left-over bindings.  Hmm.  Perhaps there should be no
5169      left over bindings?  Or any left-over bindings are
5170      by definition dead? */
5171   bb->next = atbSubst_Expr(env, bb->next);
5172   bb->stmts_used = j;
5173
5174   return max_ga_known ? max_ga : ~(Addr64)0;
5175}
5176
5177
5178/*---------------------------------------------------------------*/
5179/*--- iropt main                                              ---*/
5180/*---------------------------------------------------------------*/
5181
5182static Bool iropt_verbose = False; /* True; */
5183
5184
5185/* Do a simple cleanup pass on bb.  This is: redundant Get removal,
5186   redundant Put removal, constant propagation, dead code removal,
5187   clean helper specialisation, and dead code removal (again).
5188*/
5189
5190
5191static
5192IRSB* cheap_transformations (
5193         IRSB* bb,
5194         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
5195         Bool (*preciseMemExnsFn)(Int,Int)
5196      )
5197{
5198   redundant_get_removal_BB ( bb );
5199   if (iropt_verbose) {
5200      vex_printf("\n========= REDUNDANT GET\n\n" );
5201      ppIRSB(bb);
5202   }
5203
5204   if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) {
5205      redundant_put_removal_BB ( bb, preciseMemExnsFn );
5206   }
5207   if (iropt_verbose) {
5208      vex_printf("\n========= REDUNDANT PUT\n\n" );
5209      ppIRSB(bb);
5210   }
5211
5212   bb = cprop_BB ( bb );
5213   if (iropt_verbose) {
5214      vex_printf("\n========= CPROPD\n\n" );
5215      ppIRSB(bb);
5216   }
5217
5218   do_deadcode_BB ( bb );
5219   if (iropt_verbose) {
5220      vex_printf("\n========= DEAD\n\n" );
5221      ppIRSB(bb);
5222   }
5223
5224   bb = spec_helpers_BB ( bb, specHelper );
5225   do_deadcode_BB ( bb );
5226   if (iropt_verbose) {
5227      vex_printf("\n========= SPECd \n\n" );
5228      ppIRSB(bb);
5229   }
5230
5231   return bb;
5232}
5233
5234
5235/* Do some more expensive transformations on bb, which are aimed at
5236   optimising as much as possible in the presence of GetI and PutI.  */
5237
5238static
5239IRSB* expensive_transformations( IRSB* bb )
5240{
5241   (void)do_cse_BB( bb );
5242   collapse_AddSub_chains_BB( bb );
5243   do_redundant_GetI_elimination( bb );
5244   if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) {
5245      do_redundant_PutI_elimination( bb );
5246   }
5247   do_deadcode_BB( bb );
5248   return bb;
5249}
5250
5251
5252/* Scan a flattened BB to look for signs that more expensive
5253   optimisations might be useful:
5254   - find out if there are any GetIs and PutIs
5255   - find out if there are any floating or vector-typed temporaries
5256*/
5257
5258static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
5259                                 /*OUT*/Bool* hasVorFtemps,
5260                                 IRSB* bb )
5261{
5262   Int      i, j;
5263   IRStmt*  st;
5264   IRDirty* d;
5265   IRCAS*   cas;
5266
5267   *hasGetIorPutI = False;
5268   *hasVorFtemps  = False;
5269
5270   for (i = 0; i < bb->stmts_used; i++) {
5271      st = bb->stmts[i];
5272      switch (st->tag) {
5273         case Ist_AbiHint:
5274            vassert(isIRAtom(st->Ist.AbiHint.base));
5275            vassert(isIRAtom(st->Ist.AbiHint.nia));
5276            break;
5277         case Ist_PutI:
5278            *hasGetIorPutI = True;
5279            break;
5280         case Ist_WrTmp:
5281            if (st->Ist.WrTmp.data->tag == Iex_GetI)
5282               *hasGetIorPutI = True;
5283            switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
5284               case Ity_I1: case Ity_I8: case Ity_I16:
5285               case Ity_I32: case Ity_I64: case Ity_I128:
5286                  break;
5287               case Ity_F32: case Ity_F64: case Ity_F128:
5288               case Ity_V128: case Ity_V256:
5289                  *hasVorFtemps = True;
5290                  break;
5291               case Ity_D32: case Ity_D64: case Ity_D128:
5292                  *hasVorFtemps = True;
5293                  break;
5294               default:
5295                  goto bad;
5296            }
5297            break;
5298         case Ist_Put:
5299            vassert(isIRAtom(st->Ist.Put.data));
5300            break;
5301         case Ist_Store:
5302            vassert(isIRAtom(st->Ist.Store.addr));
5303            vassert(isIRAtom(st->Ist.Store.data));
5304            break;
5305         case Ist_CAS:
5306            cas = st->Ist.CAS.details;
5307            vassert(isIRAtom(cas->addr));
5308            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
5309            vassert(isIRAtom(cas->expdLo));
5310            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
5311            vassert(isIRAtom(cas->dataLo));
5312            break;
5313         case Ist_LLSC:
5314            vassert(isIRAtom(st->Ist.LLSC.addr));
5315            if (st->Ist.LLSC.storedata)
5316               vassert(isIRAtom(st->Ist.LLSC.storedata));
5317            break;
5318         case Ist_Dirty:
5319            d = st->Ist.Dirty.details;
5320            vassert(isIRAtom(d->guard));
5321            for (j = 0; d->args[j]; j++)
5322               vassert(isIRAtom(d->args[j]));
5323            if (d->mFx != Ifx_None)
5324               vassert(isIRAtom(d->mAddr));
5325            break;
5326         case Ist_NoOp:
5327         case Ist_IMark:
5328         case Ist_MBE:
5329            break;
5330         case Ist_Exit:
5331            vassert(isIRAtom(st->Ist.Exit.guard));
5332            break;
5333         default:
5334         bad:
5335            ppIRStmt(st);
5336            vpanic("considerExpensives");
5337      }
5338   }
5339}
5340
5341
5342/* ---------------- The main iropt entry point. ---------------- */
5343
5344/* exported from this file */
5345/* Rules of the game:
5346
5347   - IRExpr/IRStmt trees should be treated as immutable, as they
5348     may get shared.  So never change a field of such a tree node;
5349     instead construct and return a new one if needed.
5350*/
5351
5352
5353IRSB* do_iropt_BB(
5354         IRSB* bb0,
5355         IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
5356         Bool (*preciseMemExnsFn)(Int,Int),
5357         Addr64 guest_addr,
5358         VexArch guest_arch
5359      )
5360{
5361   static Int n_total     = 0;
5362   static Int n_expensive = 0;
5363
5364   Bool hasGetIorPutI, hasVorFtemps;
5365   IRSB *bb, *bb2;
5366
5367   n_total++;
5368
5369   /* First flatten the block out, since all other
5370      phases assume flat code. */
5371
5372   bb = flatten_BB ( bb0 );
5373
5374   if (iropt_verbose) {
5375      vex_printf("\n========= FLAT\n\n" );
5376      ppIRSB(bb);
5377   }
5378
5379   /* If at level 0, stop now. */
5380   if (vex_control.iropt_level <= 0) return bb;
5381
5382   /* Now do a preliminary cleanup pass, and figure out if we also
5383      need to do 'expensive' optimisations.  Expensive optimisations
5384      are deemed necessary if the block contains any GetIs or PutIs.
5385      If needed, do expensive transformations and then another cheap
5386      cleanup pass. */
5387
5388   bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
5389
5390   if (guest_arch == VexArchARM) {
5391      /* Translating Thumb2 code produces a lot of chaff.  We have to
5392         work extra hard to get rid of it. */
5393      bb = cprop_BB(bb);
5394      bb = spec_helpers_BB ( bb, specHelper );
5395      if (vex_control.iropt_register_updates != VexRegUpdAllregsAtEachInsn) {
5396         redundant_put_removal_BB ( bb, preciseMemExnsFn );
5397      }
5398      do_cse_BB( bb );
5399      do_deadcode_BB( bb );
5400   }
5401
5402   if (vex_control.iropt_level > 1) {
5403
5404      /* Peer at what we have, to decide how much more effort to throw
5405         at it. */
5406      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
5407
5408      if (hasVorFtemps && !hasGetIorPutI) {
5409         /* If any evidence of FP or Vector activity, CSE, as that
5410            tends to mop up all manner of lardy code to do with
5411            rounding modes.  Don't bother if hasGetIorPutI since that
5412            case leads into the expensive transformations, which do
5413            CSE anyway. */
5414         (void)do_cse_BB( bb );
5415         do_deadcode_BB( bb );
5416      }
5417
5418      if (hasGetIorPutI) {
5419         Bool cses;
5420         n_expensive++;
5421         if (DEBUG_IROPT)
5422            vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
5423         bb = expensive_transformations( bb );
5424         bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
5425         /* Potentially common up GetIs */
5426         cses = do_cse_BB( bb );
5427         if (cses)
5428            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
5429      }
5430
5431      /* Now have a go at unrolling simple (single-BB) loops.  If
5432         successful, clean up the results as much as possible. */
5433
5434      bb2 = maybe_loop_unroll_BB( bb, guest_addr );
5435      if (bb2) {
5436         bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
5437         if (hasGetIorPutI) {
5438            bb = expensive_transformations( bb );
5439            bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
5440         } else {
5441            /* at least do CSE and dead code removal */
5442            do_cse_BB( bb );
5443            do_deadcode_BB( bb );
5444         }
5445         if (0) vex_printf("vex iropt: unrolled a loop\n");
5446      }
5447
5448   }
5449
5450   return bb;
5451}
5452
5453
5454/*---------------------------------------------------------------*/
5455/*--- end                                            ir_opt.c ---*/
5456/*---------------------------------------------------------------*/
5457