1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_reg_alloc2.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38
39#include "main_util.h"
40#include "host_generic_regs.h"
41
42/* Set to 1 for lots of debugging output. */
43#define DEBUG_REGALLOC 0
44
45
46/* TODO 27 Oct 04:
47
48   Better consistency checking from what isMove tells us.
49
50   We can possibly do V-V coalescing even when the src is spilled,
51   providing we can arrange for the dst to have the same spill slot.
52
53   Note that state[].hreg is the same as the available real regs.
54
55   Generally rationalise data structures.  */
56
57
58/* Records information on virtual register live ranges.  Computed once
59   and remains unchanged after that. */
60typedef
61   struct {
62      /* Becomes live for the first time after this insn ... */
63      Short live_after;
64      /* Becomes dead for the last time before this insn ... */
65      Short dead_before;
66      /* The "home" spill slot, if needed.  Never changes. */
67      Short spill_offset;
68      Short spill_size;
69      /* What kind of register this is. */
70      HRegClass reg_class;
71   }
72   VRegLR;
73
74
75/* Records information on real-register live ranges.  Computed once
76   and remains unchanged after that. */
77typedef
78   struct {
79      HReg rreg;
80      /* Becomes live after this insn ... */
81      Short live_after;
82      /* Becomes dead before this insn ... */
83      Short dead_before;
84   }
85   RRegLR;
86
87
88/* An array of the following structs (rreg_state) comprises the
89   running state of the allocator.  It indicates what the current
90   disposition of each allocatable real register is.  The array gets
91   updated as the allocator processes instructions.  The identity of
92   the register is not recorded here, because the index of this
93   structure in doRegisterAllocation()'s |rreg_state| is the index
94   number of the register, and the register itself can be extracted
95   from the RRegUniverse supplied to doRegisterAllocation(). */
96typedef
97   struct {
98      /* ------ FIELDS WHICH DO NOT CHANGE ------ */
99      /* Is this involved in any HLRs?  (only an optimisation hint) */
100      Bool has_hlrs;
101      /* ------ FIELDS WHICH DO CHANGE ------ */
102      /* 6 May 07: rearranged fields below so the whole struct fits
103         into 16 bytes on both x86 and amd64. */
104      /* Used when .disp == Bound and we are looking for vregs to
105         spill. */
106      Bool is_spill_cand;
107      /* Optimisation: used when .disp == Bound.  Indicates when the
108         rreg has the same value as the spill slot for the associated
109         vreg.  Is safely left at False, and becomes True after a
110         spill store or reload for this rreg. */
111      Bool eq_spill_slot;
112      /* What's it's current disposition? */
113      enum { Free,     /* available for use */
114             Unavail,  /* in a real-reg live range */
115             Bound     /* in use (holding value of some vreg) */
116           }
117           disp;
118      /* If .disp == Bound, what vreg is it bound to? */
119      HReg vreg;
120   }
121   RRegState;
122
123
124/* The allocator also maintains a redundant array of indexes
125   (vreg_state) from vreg numbers back to entries in rreg_state.  It
126   is redundant because iff vreg_state[i] == j then
127   hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
128   point at each other.  The purpose of this is to speed up activities
129   which involve looking for a particular vreg: there is no need to
130   scan the rreg_state looking for it, just index directly into
131   vreg_state.  The FAQ "does this vreg already have an associated
132   rreg" is the main beneficiary.
133
134   To indicate, in vreg_state[i], that a given vreg is not currently
135   associated with any rreg, that entry can be set to INVALID_RREG_NO.
136
137   Because the vreg_state entries are signed Shorts, the max number
138   of vregs that can be handed by regalloc is 32767.
139*/
140
141#define INVALID_RREG_NO ((Short)(-1))
142
143#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
144#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
145
146
147/* Search forward from some given point in the incoming instruction
148   sequence.  Point is to select a virtual register to spill, by
149   finding the vreg which is mentioned as far ahead as possible, in
150   the hope that this will minimise the number of consequent reloads.
151
152   Only do the search for vregs which are Bound in the running state,
153   and for which the .is_spill_cand field is set.  This allows the
154   caller to arbitrarily restrict the set of spill candidates to be
155   considered.
156
157   To do this we don't actually need to see the incoming instruction
158   stream.  Rather, what we need us the HRegUsage records for the
159   incoming instruction stream.  Hence that is passed in.
160
161   Returns an index into the state array indicating the (v,r) pair to
162   spill, or -1 if none was found.  */
163static
164Int findMostDistantlyMentionedVReg (
165   HRegUsage*   reg_usages_in,
166   Int          search_from_instr,
167   Int          num_instrs,
168   RRegState*   state,
169   Int          n_state
170)
171{
172   Int k, m;
173   Int furthest_k = -1;
174   Int furthest   = -1;
175   vassert(search_from_instr >= 0);
176   for (k = 0; k < n_state; k++) {
177      if (!state[k].is_spill_cand)
178         continue;
179      vassert(state[k].disp == Bound);
180      for (m = search_from_instr; m < num_instrs; m++) {
181         if (HRegUsage__contains(&reg_usages_in[m], state[k].vreg))
182            break;
183      }
184      if (m > furthest) {
185         furthest   = m;
186         furthest_k = k;
187      }
188   }
189   return furthest_k;
190}
191
192
193/* Check that this vreg has been assigned a sane spill offset. */
194inline
195static void sanity_check_spill_offset ( VRegLR* vreg )
196{
197   switch (vreg->reg_class) {
198      case HRcVec128: case HRcFlt64:
199         vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
200      default:
201         vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
202   }
203}
204
205
206/* Double the size of the real-reg live-range array, if needed. */
207__attribute__((noinline))
208static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
209{
210   Int     k;
211   RRegLR* arr2;
212   if (0)
213      vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
214   vassert(used == *size);
215   arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
216   for (k = 0; k < *size; k++)
217      arr2[k] = (*info)[k];
218   *size *= 2;
219   *info = arr2;
220}
221inline
222static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223{
224   if (LIKELY(used < *size)) return;
225   ensureRRLRspace_SLOW(info, size, used);
226}
227
228
229/* Sort an array of RRegLR entries by either the .live_after or
230   .dead_before fields.  This is performance-critical. */
231static void sortRRLRarray ( RRegLR* arr,
232                            Int size, Bool by_live_after )
233{
234   Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
235                       9841, 29524, 88573, 265720,
236                       797161, 2391484 };
237   Int    lo = 0;
238   Int    hi = size-1;
239   Int    i, j, h, bigN, hp;
240   RRegLR v;
241
242   vassert(size >= 0);
243   if (size == 0)
244      return;
245
246   bigN = hi - lo + 1; if (bigN < 2) return;
247   hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248
249   if (by_live_after) {
250
251      for ( ; hp >= 0; hp--) {
252         h = incs[hp];
253         for (i = lo + h; i <= hi; i++) {
254            v = arr[i];
255            j = i;
256            while (arr[j-h].live_after > v.live_after) {
257               arr[j] = arr[j-h];
258               j = j - h;
259               if (j <= (lo + h - 1)) break;
260            }
261            arr[j] = v;
262         }
263      }
264
265   } else {
266
267      for ( ; hp >= 0; hp--) {
268         h = incs[hp];
269         for (i = lo + h; i <= hi; i++) {
270            v = arr[i];
271            j = i;
272            while (arr[j-h].dead_before > v.dead_before) {
273               arr[j] = arr[j-h];
274               j = j - h;
275               if (j <= (lo + h - 1)) break;
276            }
277            arr[j] = v;
278         }
279      }
280
281   }
282}
283
284
285/* Compute the index of the highest and lowest 1 in a ULong,
286   respectively.  Results are undefined if the argument is zero.
287   Don't pass it zero :) */
288static inline UInt ULong__maxIndex ( ULong w64 ) {
289   return 63 - __builtin_clzll(w64);
290}
291
292static inline UInt ULong__minIndex ( ULong w64 ) {
293   return __builtin_ctzll(w64);
294}
295
296
297/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
298static void* local_memset ( void *destV, Int c, SizeT sz )
299{
300#  define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
301
302   UInt   c4;
303   UChar* d = destV;
304   UChar  uc = c;
305
306   while ((!IS_4_ALIGNED(d)) && sz >= 1) {
307      d[0] = uc;
308      d++;
309      sz--;
310   }
311   if (sz == 0)
312      return destV;
313   c4 = uc;
314   c4 |= (c4 << 8);
315   c4 |= (c4 << 16);
316   while (sz >= 16) {
317      ((UInt*)d)[0] = c4;
318      ((UInt*)d)[1] = c4;
319      ((UInt*)d)[2] = c4;
320      ((UInt*)d)[3] = c4;
321      d += 16;
322      sz -= 16;
323   }
324   while (sz >= 4) {
325      ((UInt*)d)[0] = c4;
326      d += 4;
327      sz -= 4;
328   }
329   while (sz >= 1) {
330      d[0] = c;
331      d++;
332      sz--;
333   }
334   return destV;
335
336#  undef IS_4_ALIGNED
337}
338
339
340/* A target-independent register allocator.  Requires various
341   functions which it uses to deal abstractly with instructions and
342   registers, since it cannot have any target-specific knowledge.
343
344   Returns a new list of instructions, which, as a result of the
345   behaviour of mapRegs, will be in-place modifications of the
346   original instructions.
347
348   Requires that the incoming code has been generated using
349   vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
350   that range is a checked run-time error.
351
352   Takes an expandable array of pointers to unallocated insns.
353   Returns an expandable array of pointers to allocated insns.
354*/
355HInstrArray* doRegisterAllocation (
356
357   /* Incoming virtual-registerised code. */
358   HInstrArray* instrs_in,
359
360   /* The real-register universe to use.  This contains facts about
361      real registers, one of which is the set of registers available
362      for allocation. */
363   const RRegUniverse* univ,
364
365   /* Return True iff the given insn is a reg-reg move, in which
366      case also return the src and dst regs. */
367   Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
368
369   /* Get info about register usage in this insn. */
370   void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
371
372   /* Apply a reg-reg mapping to an insn. */
373   void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
374
375   /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
376      real reg to a spill slot byte offset.  The two leading HInstr**
377      args are out parameters, through which the generated insns are
378      returned.  Also (optionally) a 'directReload' function, which
379      attempts to replace a given instruction by one which reads
380      directly from a specified spill slot.  May be NULL, in which
381      case the optimisation is not attempted. */
382   void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
383   void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
384   HInstr* (*directReload) ( HInstr*, HReg, Short ),
385   Int     guest_sizeB,
386
387   /* For debug printing only. */
388   void (*ppInstr) ( const HInstr*, Bool ),
389   void (*ppReg) ( HReg ),
390
391   /* 32/64bit mode */
392   Bool mode64
393)
394{
395#  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
396
397   const Bool eq_spill_opt = True;
398
399   /* Info on vregs and rregs.  Computed once and remains
400      unchanged. */
401   Int     n_vregs;
402   VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
403
404   /* We keep two copies of the real-reg live range info, one sorted
405      by .live_after and the other by .dead_before.  First the
406      unsorted info is created in the _la variant is copied into the
407      _db variant.  Once that's done both of them are sorted.
408      We also need two integer cursors which record the next
409      location in the two arrays to consider. */
410   RRegLR* rreg_lrs_la;
411   RRegLR* rreg_lrs_db;
412   Int     rreg_lrs_size;
413   Int     rreg_lrs_used;
414   Int     rreg_lrs_la_next;
415   Int     rreg_lrs_db_next;
416
417   /* Info on register usage in the incoming instruction array.
418      Computed once and remains unchanged, more or less; updated
419      sometimes by the direct-reload optimisation. */
420   HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
421
422   /* Used when constructing vreg_lrs (for allocating stack
423      slots). */
424   Short ss_busy_until_before[N_SPILL64S];
425
426   /* Used when constructing rreg_lrs. */
427   Int* rreg_live_after;
428   Int* rreg_dead_before;
429
430   /* Running state of the core allocation algorithm. */
431   RRegState* rreg_state;  /* [0 .. n_rregs-1] */
432   Int        n_rregs;
433
434   /* .. and the redundant backward map */
435   /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
436      This inplies n_rregs must be <= 32768. */
437   Short*     vreg_state;  /* [0 .. n_vregs-1] */
438
439   /* The vreg -> rreg map constructed and then applied to each
440      instr. */
441   HRegRemap remap;
442
443   /* The output array of instructions. */
444   HInstrArray* instrs_out;
445
446   /* Sanity checks are expensive.  They are only done periodically,
447      not at each insn processed. */
448   Bool do_sanity_check;
449
450   vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
451   vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
452   vassert(0 == (N_SPILL64S % 2));
453
454   /* The live range numbers are signed shorts, and so limiting the
455      number of insns to 15000 comfortably guards against them
456      overflowing 32k. */
457   vassert(instrs_in->arr_used <= 15000);
458
459#  define INVALID_INSTRNO (-2)
460
461#  define EMIT_INSTR(_instr)                  \
462      do {                                    \
463        HInstr* _tmp = (_instr);              \
464        if (DEBUG_REGALLOC) {                 \
465           vex_printf("**  ");                \
466           (*ppInstr)(_tmp, mode64);          \
467           vex_printf("\n\n");                \
468        }                                     \
469        addHInstr ( instrs_out, _tmp );       \
470      } while (0)
471
472#   define PRINT_STATE						   \
473      do {							   \
474         Int z, q;						   \
475         for (z = 0; z < n_rregs; z++) {			   \
476            vex_printf("  rreg_state[%2d] = ", z);		   \
477            (*ppReg)(univ->regs[z]);	       			   \
478            vex_printf("  \t");					   \
479            switch (rreg_state[z].disp) {			   \
480               case Free:    vex_printf("Free\n"); break;	   \
481               case Unavail: vex_printf("Unavail\n"); break;	   \
482               case Bound:   vex_printf("BoundTo "); 		   \
483                             (*ppReg)(rreg_state[z].vreg);	   \
484                             vex_printf("\n"); break;		   \
485            }							   \
486         }							   \
487         vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
488         q = 0;                                                    \
489         for (z = 0; z < n_vregs; z++) {                           \
490            if (vreg_state[z] == INVALID_RREG_NO)                  \
491               continue;                                           \
492            vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
493            q++;                                                   \
494            if (q > 0 && (q % 6) == 0)                             \
495               vex_printf("\n    ");                               \
496         }                                                         \
497         vex_printf("\n");                                         \
498      } while (0)
499
500
501   /* --------- Stage 0: set up output array --------- */
502   /* --------- and allocate/initialise running state. --------- */
503
504   instrs_out = newHInstrArray();
505
506   /* ... and initialise running state. */
507   /* n_rregs is no more than a short name for n_available_real_regs. */
508   n_rregs = univ->allocable;
509   n_vregs = instrs_in->n_vregs;
510
511   /* If this is not so, vreg_state entries will overflow. */
512   vassert(n_vregs < 32767);
513
514   /* If this is not so, the universe we have is nonsensical. */
515   vassert(n_rregs > 0);
516
517   rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
518   vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
519
520   for (Int j = 0; j < n_rregs; j++) {
521      rreg_state[j].has_hlrs      = False;
522      rreg_state[j].disp          = Free;
523      rreg_state[j].vreg          = INVALID_HREG;
524      rreg_state[j].is_spill_cand = False;
525      rreg_state[j].eq_spill_slot = False;
526   }
527
528   for (Int j = 0; j < n_vregs; j++)
529      vreg_state[j] = INVALID_RREG_NO;
530
531
532   /* --------- Stage 1: compute vreg live ranges. --------- */
533   /* --------- Stage 2: compute rreg live ranges. --------- */
534
535   /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
536
537   /* This is relatively simple, because (1) we only seek the complete
538      end-to-end live range of each vreg, and are not interested in
539      any holes in it, and (2) the vregs are conveniently numbered 0
540      .. n_vregs-1, so we can just dump the results in a
541      pre-allocated array. */
542
543   vreg_lrs = NULL;
544   if (n_vregs > 0)
545      vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
546
547   for (Int j = 0; j < n_vregs; j++) {
548      vreg_lrs[j].live_after     = INVALID_INSTRNO;
549      vreg_lrs[j].dead_before    = INVALID_INSTRNO;
550      vreg_lrs[j].spill_offset   = 0;
551      vreg_lrs[j].spill_size     = 0;
552      vreg_lrs[j].reg_class      = HRcINVALID;
553   }
554
555   /* An array to hold the reg-usage info for the incoming
556      instructions. */
557   reg_usage_arr
558      = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1);
559
560   /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
561
562   /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
563
564   /* This is more complex than Stage 1, because we need to compute
565      exactly all the live ranges of all the allocatable real regs,
566      and we don't know in advance how many there will be. */
567
568   rreg_lrs_used = 0;
569   rreg_lrs_size = 4;
570   rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
571   rreg_lrs_db = NULL; /* we'll create this later */
572
573   /* We'll need to track live range start/end points seperately for
574      each rreg.  Sigh. */
575   vassert(n_rregs > 0);
576   rreg_live_after  = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
577   rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
578
579   for (Int j = 0; j < n_rregs; j++) {
580      rreg_live_after[j] =
581      rreg_dead_before[j] = INVALID_INSTRNO;
582   }
583
584   /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
585
586   /* ------ start of ITERATE OVER INSNS ------ */
587
588   for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
589
590      (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
591
592      if (0) {
593         vex_printf("\n%d  stage1: ", ii);
594         (*ppInstr)(instrs_in->arr[ii], mode64);
595         vex_printf("\n");
596         ppHRegUsage(univ, &reg_usage_arr[ii]);
597      }
598
599      /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
600
601      /* for each virtual reg mentioned in the insn ... */
602      for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
603
604         HReg vreg = reg_usage_arr[ii].vRegs[j];
605         vassert(hregIsVirtual(vreg));
606
607         Int k = hregIndex(vreg);
608         if (k < 0 || k >= n_vregs) {
609            vex_printf("\n");
610            (*ppInstr)(instrs_in->arr[ii], mode64);
611            vex_printf("\n");
612            vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
613            vpanic("doRegisterAllocation: out-of-range vreg");
614         }
615
616         /* Take the opportunity to note its regclass.  We'll need
617            that when allocating spill slots. */
618         if (vreg_lrs[k].reg_class == HRcINVALID) {
619            /* First mention of this vreg. */
620            vreg_lrs[k].reg_class = hregClass(vreg);
621         } else {
622            /* Seen it before, so check for consistency. */
623            vassert(vreg_lrs[k].reg_class == hregClass(vreg));
624         }
625
626         /* Now consider live ranges. */
627         switch (reg_usage_arr[ii].vMode[j]) {
628            case HRmRead:
629               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
630                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
631                  vpanic("doRegisterAllocation: "
632                         "first event for vreg is Read");
633               }
634               vreg_lrs[k].dead_before = toShort(ii + 1);
635               break;
636            case HRmWrite:
637               if (vreg_lrs[k].live_after == INVALID_INSTRNO)
638                  vreg_lrs[k].live_after = toShort(ii);
639               vreg_lrs[k].dead_before = toShort(ii + 1);
640               break;
641            case HRmModify:
642               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
643                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
644                  vpanic("doRegisterAllocation: "
645                         "first event for vreg is Modify");
646               }
647               vreg_lrs[k].dead_before = toShort(ii + 1);
648               break;
649            default:
650               vpanic("doRegisterAllocation(1)");
651         } /* switch */
652
653      } /* iterate over virtual registers */
654
655      /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
656
657      /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
658
659      /* If this doesn't hold, the following iteration over real registers
660         will fail miserably. */
661      vassert(N_RREGUNIVERSE_REGS == 64);
662
663      const ULong rRead      = reg_usage_arr[ii].rRead;
664      const ULong rWritten   = reg_usage_arr[ii].rWritten;
665      const ULong rMentioned = rRead | rWritten;
666
667      UInt rReg_minIndex;
668      UInt rReg_maxIndex;
669      if (rMentioned == 0) {
670         /* There are no real register uses in this insn.  Set
671            rReg_{min,max}Index so that the following loop doesn't iterate
672            at all, so as to avoid wasting time. */
673         rReg_minIndex = 1;
674         rReg_maxIndex = 0;
675      } else {
676         rReg_minIndex = ULong__minIndex(rMentioned);
677         rReg_maxIndex = ULong__maxIndex(rMentioned);
678         /* Don't bother to look at registers which are not available
679            to the allocator.  We asserted above that n_rregs > 0, so
680            n_rregs-1 is safe. */
681         if (rReg_maxIndex >= n_rregs)
682            rReg_maxIndex = n_rregs-1;
683      }
684
685      /* for each allocator-available real reg mentioned in the insn ... */
686      /* Note.  We are allocating only over the real regs available to
687         the allocator.  Others, eg the stack or baseblock pointers,
688         are unavailable to allocation and so we never visit them.
689         Hence the iteration is cut off at n_rregs-1, since n_rregs ==
690         univ->allocable. */
691      for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
692
693         const ULong jMask = 1ULL << j;
694         if (LIKELY((rMentioned & jMask) == 0))
695            continue;
696
697         const Bool isR = (rRead    & jMask) != 0;
698         const Bool isW = (rWritten & jMask) != 0;
699
700         /* Dummy initialisations of flush_la and flush_db to avoid
701            possible bogus uninit-var warnings from gcc. */
702         Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
703         Bool flush = False;
704
705         if (isW && !isR) {
706            flush_la = rreg_live_after[j];
707            flush_db = rreg_dead_before[j];
708            if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
709               flush = True;
710            rreg_live_after[j]  = ii;
711            rreg_dead_before[j] = ii+1;
712         } else if (!isW && isR) {
713            if (rreg_live_after[j] == INVALID_INSTRNO) {
714               vex_printf("\nOFFENDING RREG = ");
715               (*ppReg)(univ->regs[j]);
716               vex_printf("\n");
717               vex_printf("\nOFFENDING instr = ");
718               (*ppInstr)(instrs_in->arr[ii], mode64);
719               vex_printf("\n");
720               vpanic("doRegisterAllocation: "
721                      "first event for rreg is Read");
722            }
723            rreg_dead_before[j] = ii+1;
724         } else {
725            vassert(isR && isW);
726            if (rreg_live_after[j] == INVALID_INSTRNO) {
727               vex_printf("\nOFFENDING RREG = ");
728               (*ppReg)(univ->regs[j]);
729               vex_printf("\n");
730               vex_printf("\nOFFENDING instr = ");
731               (*ppInstr)(instrs_in->arr[ii], mode64);
732               vex_printf("\n");
733               vpanic("doRegisterAllocation: "
734                      "first event for rreg is Modify");
735            }
736            rreg_dead_before[j] = ii+1;
737         }
738
739         if (flush) {
740            vassert(flush_la != INVALID_INSTRNO);
741            vassert(flush_db != INVALID_INSTRNO);
742            ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
743            if (0)
744               vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
745            rreg_lrs_la[rreg_lrs_used].rreg        = univ->regs[j];
746            rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
747            rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
748            rreg_lrs_used++;
749         }
750
751      } /* iterate over rregs in the instr */
752
753      /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
754
755   } /* iterate over insns */
756
757   /* ------ end of ITERATE OVER INSNS ------ */
758
759   /* ------ start of FINALISE RREG LIVE RANGES ------ */
760
761   /* Now finish up any live ranges left over. */
762   for (Int j = 0; j < n_rregs; j++) {
763
764      if (0) {
765         vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
766                                                rreg_dead_before[j]);
767      }
768      vassert( (rreg_live_after[j] == INVALID_INSTRNO
769                && rreg_dead_before[j] == INVALID_INSTRNO)
770              ||
771               (rreg_live_after[j] != INVALID_INSTRNO
772                && rreg_dead_before[j] != INVALID_INSTRNO)
773            );
774
775      if (rreg_live_after[j] == INVALID_INSTRNO)
776         continue;
777
778      ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
779      if (0)
780         vex_printf("FLUSH 2 (%d,%d)\n",
781                    rreg_live_after[j], rreg_dead_before[j]);
782      rreg_lrs_la[rreg_lrs_used].rreg        = univ->regs[j];
783      rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
784      rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
785      rreg_lrs_used++;
786   }
787
788   /* Compute summary hints for choosing real regs.  If a real reg is
789      involved in a hard live range, record that fact in the fixed
790      part of the running rreg_state.  Later, when offered a choice between
791      rregs, it's better to choose one which is not marked as having
792      any HLRs, since ones with HLRs may need to be spilled around
793      their HLRs.  Correctness of final assignment is unaffected by
794      this mechanism -- it is only an optimisation. */
795
796   for (Int j = 0; j < rreg_lrs_used; j++) {
797      HReg rreg = rreg_lrs_la[j].rreg;
798      vassert(!hregIsVirtual(rreg));
799      /* rreg is involved in a HLR.  Record this info in the array, if
800         there is space. */
801      UInt ix = hregIndex(rreg);
802      vassert(ix < n_rregs);
803      rreg_state[ix].has_hlrs = True;
804   }
805   if (0) {
806      for (Int j = 0; j < n_rregs; j++) {
807         if (!rreg_state[j].has_hlrs)
808            continue;
809         ppReg(univ->regs[j]);
810         vex_printf(" hinted\n");
811      }
812   }
813
814   /* Finally, copy the _la variant into the _db variant and
815      sort both by their respective fields. */
816   rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
817   for (Int j = 0; j < rreg_lrs_used; j++)
818      rreg_lrs_db[j] = rreg_lrs_la[j];
819
820   sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
821   sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
822
823   /* And set up the cursors. */
824   rreg_lrs_la_next = 0;
825   rreg_lrs_db_next = 0;
826
827   for (Int j = 1; j < rreg_lrs_used; j++) {
828      vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
829      vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
830   }
831
832   /* ------ end of FINALISE RREG LIVE RANGES ------ */
833
834   if (DEBUG_REGALLOC) {
835      for (Int j = 0; j < n_vregs; j++) {
836         vex_printf("vreg %d:  la = %d,  db = %d\n",
837                    j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
838      }
839   }
840
841   if (DEBUG_REGALLOC) {
842      vex_printf("RRegLRs by LA:\n");
843      for (Int j = 0; j < rreg_lrs_used; j++) {
844         vex_printf("  ");
845         (*ppReg)(rreg_lrs_la[j].rreg);
846         vex_printf("      la = %d,  db = %d\n",
847                    rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
848      }
849      vex_printf("RRegLRs by DB:\n");
850      for (Int j = 0; j < rreg_lrs_used; j++) {
851         vex_printf("  ");
852         (*ppReg)(rreg_lrs_db[j].rreg);
853         vex_printf("      la = %d,  db = %d\n",
854                    rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
855      }
856   }
857
858   /* --------- Stage 3: allocate spill slots. --------- */
859
860   /* Each spill slot is 8 bytes long.  For vregs which take more than
861      64 bits to spill (classes Flt64 and Vec128), we have to allocate
862      two consecutive spill slots.  For 256 bit registers (class
863      Vec256), we have to allocate four consecutive spill slots.
864
865      For Vec128-class on PowerPC, the spill slot's actual address
866      must be 16-byte aligned.  Since the spill slot's address is
867      computed as an offset from the guest state pointer, and since
868      the user of the generated code must set that pointer to a
869      32-aligned value, we have the residual obligation here of
870      choosing a 16-aligned spill slot offset for Vec128-class values.
871      Since each spill slot is 8 bytes long, that means for
872      Vec128-class values we must allocated a spill slot number which
873      is zero mod 2.
874
875      Similarly, for Vec256 class on amd64, find a spill slot number
876      which is zero mod 4.  This guarantees it will be 32 byte
877      aligned, which isn't actually necessary on amd64 (we use movUpd
878      etc to spill), but seems like good practice.
879
880      Do a rank-based allocation of vregs to spill slot numbers.  We
881      put as few values as possible in spill slots, but nevertheless
882      need to have a spill slot available for all vregs, just in case.
883   */
884   /* Int max_ss_no = -1; */
885
886   local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
887
888   for (Int j = 0; j < n_vregs; j++) {
889
890      /* True iff this vreg is unused.  In which case we also expect
891         that the reg_class field for it has not been set.  */
892      if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
893         vassert(vreg_lrs[j].reg_class == HRcINVALID);
894         continue;
895      }
896
897      /* The spill slots are 64 bits in size.  As per the comment on
898         definition of HRegClass in host_generic_regs.h, that means,
899         to spill a vreg of class Flt64 or Vec128, we'll need to find
900         two adjacent spill slots to use.  For Vec256, we'll need to
901         find four adjacent slots to use.  Note, this logic needs to
902         kept in sync with the size info on the definition of
903         HRegClass. */
904      Int ss_no = -1;
905      switch (vreg_lrs[j].reg_class) {
906
907         case HRcVec128: case HRcFlt64:
908            /* Find two adjacent free slots in which between them
909               provide up to 128 bits in which to spill the vreg.
910               Since we are trying to find an even:odd pair, move
911               along in steps of 2 (slots). */
912            for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
913               if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
914                   && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
915                  break;
916            if (ss_no >= N_SPILL64S-1) {
917               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
918                      "Increase and recompile.");
919            }
920            ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
921            ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
922            break;
923
924         default:
925            /* The ordinary case -- just find a single spill slot. */
926            /* Find the lowest-numbered spill slot which is available
927               at the start point of this interval, and assign the
928               interval to it. */
929            for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
930               if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
931                  break;
932            if (ss_no == N_SPILL64S) {
933               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
934                      "Increase and recompile.");
935            }
936            ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
937            break;
938
939      } /* switch (vreg_lrs[j].reg_class) */
940
941      /* This reflects LibVEX's hard-wired knowledge of the baseBlock
942         layout: the guest state, then two equal sized areas following
943         it for two sets of shadow state, and then the spill area. */
944      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
945
946      /* Independent check that we've made a sane choice of slot */
947      sanity_check_spill_offset( &vreg_lrs[j] );
948      /* if (j > max_ss_no) */
949      /*    max_ss_no = j; */
950   }
951
952   if (0) {
953      vex_printf("\n\n");
954      for (Int j = 0; j < n_vregs; j++)
955         vex_printf("vreg %d    --> spill offset %d\n",
956                    j, vreg_lrs[j].spill_offset);
957   }
958
959   /* --------- Stage 4: establish rreg preferences --------- */
960
961   /* It may be advantageous to allocating certain vregs to specific
962      rregs, as a way of avoiding reg-reg moves later.  Here we
963      establish which, if any, rreg each vreg would prefer to be in.
964      Note that this constrains the allocator -- ideally we end up
965      with as few as possible vregs expressing a preference.
966
967      This is an optimisation: if the .preferred_rreg field is never
968      set to anything different from INVALID_HREG, the allocator still
969      works. */
970
971   /* 30 Dec 04: removed this mechanism as it does not seem to
972      help. */
973
974   /* --------- Stage 5: process instructions --------- */
975
976   /* This is the main loop of the allocator.  First, we need to
977      correctly set up our running state, which tracks the status of
978      each real register. */
979
980   /* ------ BEGIN: Process each insn in turn. ------ */
981
982   for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
983
984      if (DEBUG_REGALLOC) {
985         vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
986         vex_printf("---- ");
987         (*ppInstr)(instrs_in->arr[ii], mode64);
988         vex_printf("\n\nInitial state:\n");
989         PRINT_STATE;
990         vex_printf("\n");
991      }
992
993      /* ------------ Sanity checks ------------ */
994
995      /* Sanity checks are expensive.  So they are done only once
996         every 13 instructions, and just before the last
997         instruction. */
998      do_sanity_check
999         = toBool(
1000              False /* Set to True for sanity checking of all insns. */
1001              || ii == instrs_in->arr_used-1
1002              || (ii > 0 && (ii % 13) == 0)
1003           );
1004
1005      if (do_sanity_check) {
1006
1007         /* Sanity check 1: all rregs with a hard live range crossing
1008            this insn must be marked as unavailable in the running
1009            state. */
1010         for (Int j = 0; j < rreg_lrs_used; j++) {
1011            if (rreg_lrs_la[j].live_after < ii
1012                && ii < rreg_lrs_la[j].dead_before) {
1013               /* ii is the middle of a hard live range for some real
1014                  reg.  Check it's marked as such in the running
1015                  state. */
1016               HReg reg = rreg_lrs_la[j].rreg;
1017
1018               if (0) {
1019                  vex_printf("considering la %d .. db %d   reg = ",
1020                             rreg_lrs_la[j].live_after,
1021                             rreg_lrs_la[j].dead_before);
1022                  (*ppReg)(reg);
1023                  vex_printf("\n");
1024               }
1025
1026               /* assert that this rreg is marked as unavailable */
1027               vassert(!hregIsVirtual(reg));
1028               vassert(rreg_state[hregIndex(reg)].disp == Unavail);
1029            }
1030         }
1031
1032         /* Sanity check 2: conversely, all rregs marked as
1033            unavailable in the running rreg_state must have a
1034            corresponding hard live range entry in the rreg_lrs
1035            array. */
1036         for (Int j = 0; j < n_rregs; j++) {
1037            vassert(rreg_state[j].disp == Bound
1038                    || rreg_state[j].disp == Free
1039                    || rreg_state[j].disp == Unavail);
1040            if (rreg_state[j].disp != Unavail)
1041               continue;
1042            Int k;
1043            for (k = 0; k < rreg_lrs_used; k++) {
1044               HReg reg = rreg_lrs_la[k].rreg;
1045               vassert(!hregIsVirtual(reg));
1046               if (hregIndex(reg) == j
1047                   && rreg_lrs_la[k].live_after < ii
1048                   && ii < rreg_lrs_la[k].dead_before)
1049                  break;
1050            }
1051            /* If this vassertion fails, we couldn't find a
1052               corresponding HLR. */
1053            vassert(k < rreg_lrs_used);
1054         }
1055
1056         /* Sanity check 3: all vreg-rreg bindings must bind registers
1057            of the same class. */
1058         for (Int j = 0; j < n_rregs; j++) {
1059            if (rreg_state[j].disp != Bound) {
1060               vassert(rreg_state[j].eq_spill_slot == False);
1061               continue;
1062            }
1063            vassert(hregClass(univ->regs[j])
1064                    == hregClass(rreg_state[j].vreg));
1065            vassert( hregIsVirtual(rreg_state[j].vreg));
1066         }
1067
1068         /* Sanity check 4: the vreg_state and rreg_state
1069            mutually-redundant mappings are consistent.  If
1070            rreg_state[j].vreg points at some vreg_state entry then
1071            that vreg_state entry should point back at
1072            rreg_state[j]. */
1073         for (Int j = 0; j < n_rregs; j++) {
1074            if (rreg_state[j].disp != Bound)
1075               continue;
1076            Int k = hregIndex(rreg_state[j].vreg);
1077            vassert(IS_VALID_VREGNO(k));
1078            vassert(vreg_state[k] == j);
1079         }
1080         for (Int j = 0; j < n_vregs; j++) {
1081            Int k = vreg_state[j];
1082            if (k == INVALID_RREG_NO)
1083               continue;
1084            vassert(IS_VALID_RREGNO(k));
1085            vassert(rreg_state[k].disp == Bound);
1086            vassert(hregIndex(rreg_state[k].vreg) == j);
1087         }
1088
1089      } /* if (do_sanity_check) */
1090
1091      /* ------------ end of Sanity checks ------------ */
1092
1093      /* Do various optimisations pertaining to register coalescing
1094         and preferencing:
1095            MOV  v <-> v   coalescing (done here).
1096            MOV  v <-> r   coalescing (not yet, if ever)
1097      */
1098      /* If doing a reg-reg move between two vregs, and the src's live
1099         range ends here and the dst's live range starts here, bind
1100         the dst to the src's rreg, and that's all. */
1101      HReg vregS = INVALID_HREG;
1102      HReg vregD = INVALID_HREG;
1103      if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1104         if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1105         if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1106         /* Check that *isMove is not telling us a bunch of lies ... */
1107         vassert(hregClass(vregS) == hregClass(vregD));
1108         Int k = hregIndex(vregS);
1109         Int m = hregIndex(vregD);
1110         vassert(IS_VALID_VREGNO(k));
1111         vassert(IS_VALID_VREGNO(m));
1112         if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1113         if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1114         if (DEBUG_REGALLOC) {
1115         vex_printf("COALESCE ");
1116            (*ppReg)(vregS);
1117            vex_printf(" -> ");
1118            (*ppReg)(vregD);
1119            vex_printf("\n\n");
1120         }
1121         /* Find the state entry for vregS. */
1122         Int n = vreg_state[k]; /* k is the index of vregS */
1123         if (n == INVALID_RREG_NO) {
1124            /* vregS is not currently in a real register.  So we can't
1125               do the coalescing.  Give up. */
1126            goto cannot_coalesce;
1127         }
1128         vassert(IS_VALID_RREGNO(n));
1129
1130         /* Finally, we can do the coalescing.  It's trivial -- merely
1131            claim vregS's register for vregD. */
1132         rreg_state[n].vreg = vregD;
1133         vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1134         vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1135         vreg_state[hregIndex(vregD)] = toShort(n);
1136         vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1137
1138         /* This rreg has become associated with a different vreg and
1139            hence with a different spill slot.  Play safe. */
1140         rreg_state[n].eq_spill_slot = False;
1141
1142         /* Move on to the next insn.  We skip the post-insn stuff for
1143            fixed registers, since this move should not interact with
1144            them in any way. */
1145         continue;
1146      }
1147     cannot_coalesce:
1148
1149      /* ------ Free up rregs bound to dead vregs ------ */
1150
1151      /* Look for vregs whose live range has just ended, and
1152	 mark the associated rreg as free. */
1153
1154      for (Int j = 0; j < n_rregs; j++) {
1155         if (rreg_state[j].disp != Bound)
1156            continue;
1157         UInt vregno = hregIndex(rreg_state[j].vreg);
1158         vassert(IS_VALID_VREGNO(vregno));
1159         if (vreg_lrs[vregno].dead_before <= ii) {
1160            rreg_state[j].disp = Free;
1161            rreg_state[j].eq_spill_slot = False;
1162            Int m = hregIndex(rreg_state[j].vreg);
1163            vassert(IS_VALID_VREGNO(m));
1164            vreg_state[m] = INVALID_RREG_NO;
1165            if (DEBUG_REGALLOC) {
1166               vex_printf("free up ");
1167               (*ppReg)(univ->regs[j]);
1168               vex_printf("\n");
1169            }
1170         }
1171      }
1172
1173      /* ------ Pre-instruction actions for fixed rreg uses ------ */
1174
1175      /* Now we have to deal with rregs which are about to be made
1176         live by this instruction -- in other words, are entering into
1177         one of their live ranges.  If any such rreg holds a vreg, we
1178         will have to free up the rreg.  The simplest solution which
1179         is correct is to spill the rreg.
1180
1181         Note we could do better:
1182         * Could move it into some other free rreg, if one is available
1183
1184         Do this efficiently, by incrementally stepping along an array
1185         of rreg HLRs that are known to be sorted by start point
1186         (their .live_after field).
1187      */
1188      while (True) {
1189         vassert(rreg_lrs_la_next >= 0);
1190         vassert(rreg_lrs_la_next <= rreg_lrs_used);
1191         if (rreg_lrs_la_next == rreg_lrs_used)
1192            break; /* no more real reg live ranges to consider */
1193         if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1194            break; /* next live range does not yet start */
1195         vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1196         /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1197            Find the associated rreg_state entry. */
1198         /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1199            Real register live ranges are guaranteed to be well-formed
1200            in that they start with a write to the register -- Stage 2
1201            rejects any code not satisfying this.  So the correct
1202            question to ask is whether
1203            rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1204            whether the reg becomes live after this insn -- rather
1205            than before it. */
1206         if (DEBUG_REGALLOC) {
1207            vex_printf("need to free up rreg: ");
1208            (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1209            vex_printf("\n\n");
1210         }
1211         Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1212
1213         /* If this fails, we don't have an entry for this rreg.
1214            Which we should. */
1215         vassert(IS_VALID_RREGNO(k));
1216         Int m = hregIndex(rreg_state[k].vreg);
1217         if (rreg_state[k].disp == Bound) {
1218            /* Yes, there is an associated vreg.  Spill it if it's
1219               still live. */
1220            vassert(IS_VALID_VREGNO(m));
1221            vreg_state[m] = INVALID_RREG_NO;
1222            if (vreg_lrs[m].dead_before > ii) {
1223               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1224               if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1225                  HInstr* spill1 = NULL;
1226                  HInstr* spill2 = NULL;
1227                  (*genSpill)( &spill1, &spill2, univ->regs[k],
1228                               vreg_lrs[m].spill_offset, mode64 );
1229                  vassert(spill1 || spill2); /* can't both be NULL */
1230                  if (spill1)
1231                     EMIT_INSTR(spill1);
1232                  if (spill2)
1233                     EMIT_INSTR(spill2);
1234               }
1235               rreg_state[k].eq_spill_slot = True;
1236            }
1237         }
1238         rreg_state[k].disp = Unavail;
1239         rreg_state[k].vreg = INVALID_HREG;
1240         rreg_state[k].eq_spill_slot = False;
1241
1242         /* check for further rregs entering HLRs at this point */
1243         rreg_lrs_la_next++;
1244      }
1245
1246      if (DEBUG_REGALLOC) {
1247         vex_printf("After pre-insn actions for fixed regs:\n");
1248         PRINT_STATE;
1249         vex_printf("\n");
1250      }
1251
1252      /* ------ Deal with the current instruction. ------ */
1253
1254      /* Finally we can begin the processing of this instruction
1255         itself.  The aim is to free up enough rregs for this insn.
1256         This may generate spill stores since we may have to evict
1257         some vregs currently in rregs.  Also generates spill loads.
1258         We also build up the final vreg->rreg mapping to be applied
1259         to the insn. */
1260
1261      initHRegRemap(&remap);
1262
1263      /* ------------ BEGIN directReload optimisation ----------- */
1264
1265      /* If the instruction reads exactly one vreg which is currently
1266         in a spill slot, and this is last use of that vreg, see if we
1267         can convert the instruction into one that reads directly from
1268         the spill slot.  This is clearly only possible for x86 and
1269         amd64 targets, since ppc and arm are load-store
1270         architectures.  If successful, replace instrs_in->arr[ii]
1271         with this new instruction, and recompute its reg usage, so
1272         that the change is invisible to the standard-case handling
1273         that follows. */
1274
1275      if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
1276         Bool  debug_direct_reload = False;
1277         HReg  cand     = INVALID_HREG;
1278         Bool  nreads   = 0;
1279         Short spilloff = 0;
1280
1281         for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1282
1283            HReg vreg = reg_usage_arr[ii].vRegs[j];
1284            vassert(hregIsVirtual(vreg));
1285
1286            if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1287               nreads++;
1288               Int m = hregIndex(vreg);
1289               vassert(IS_VALID_VREGNO(m));
1290               Int k = vreg_state[m];
1291               if (!IS_VALID_RREGNO(k)) {
1292                  /* ok, it is spilled.  Now, is this its last use? */
1293                  vassert(vreg_lrs[m].dead_before >= ii+1);
1294                  if (vreg_lrs[m].dead_before == ii+1
1295                      && hregIsInvalid(cand)) {
1296                     spilloff = vreg_lrs[m].spill_offset;
1297                     cand = vreg;
1298                  }
1299               }
1300            }
1301         }
1302
1303         if (nreads == 1 && ! hregIsInvalid(cand)) {
1304            HInstr* reloaded;
1305            if (reg_usage_arr[ii].n_vRegs == 2)
1306               vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1307                                  reg_usage_arr[ii].vRegs[1]));
1308
1309            reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1310            if (debug_direct_reload && !reloaded) {
1311               vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1312               ppInstr(instrs_in->arr[ii], mode64);
1313            }
1314            if (reloaded) {
1315               /* Update info about the insn, so it looks as if it had
1316                  been in this form all along. */
1317               instrs_in->arr[ii] = reloaded;
1318               (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
1319               if (debug_direct_reload && !reloaded) {
1320                  vex_printf("  -->  ");
1321                  ppInstr(reloaded, mode64);
1322               }
1323            }
1324
1325            if (debug_direct_reload && !reloaded)
1326               vex_printf("\n");
1327         }
1328
1329      }
1330
1331      /* ------------ END directReload optimisation ------------ */
1332
1333      /* for each virtual reg mentioned in the insn ... */
1334      for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1335
1336         HReg vreg = reg_usage_arr[ii].vRegs[j];
1337         vassert(hregIsVirtual(vreg));
1338
1339         if (0) {
1340            vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1341         }
1342
1343         /* Now we're trying to find a rreg for "vreg".  First of all,
1344            if it already has an rreg assigned, we don't need to do
1345            anything more.  Inspect the current state to find out. */
1346         Int m = hregIndex(vreg);
1347         vassert(IS_VALID_VREGNO(m));
1348         Int n = vreg_state[m];
1349         if (IS_VALID_RREGNO(n)) {
1350            vassert(rreg_state[n].disp == Bound);
1351            addToHRegRemap(&remap, vreg, univ->regs[n]);
1352            /* If this rreg is written or modified, mark it as different
1353               from any spill slot value. */
1354            if (reg_usage_arr[ii].vMode[j] != HRmRead)
1355               rreg_state[n].eq_spill_slot = False;
1356            continue;
1357         } else {
1358            vassert(n == INVALID_RREG_NO);
1359         }
1360
1361         /* No luck.  The next thing to do is see if there is a
1362            currently free rreg available, of the correct class.  If
1363            so, bag it.  NOTE, we could improve this by selecting an
1364            rreg for which the next live-range event is as far ahead
1365            as possible. */
1366         Int k_suboptimal = -1;
1367         Int k;
1368         for (k = 0; k < n_rregs; k++) {
1369            if (rreg_state[k].disp != Free
1370                || hregClass(univ->regs[k]) != hregClass(vreg))
1371               continue;
1372            if (rreg_state[k].has_hlrs) {
1373               /* Well, at least we can use k_suboptimal if we really
1374                  have to.  Keep on looking for a better candidate. */
1375               k_suboptimal = k;
1376            } else {
1377               /* Found a preferable reg.  Use it. */
1378               k_suboptimal = -1;
1379               break;
1380            }
1381         }
1382         if (k_suboptimal >= 0)
1383            k = k_suboptimal;
1384
1385         if (k < n_rregs) {
1386            rreg_state[k].disp = Bound;
1387            rreg_state[k].vreg = vreg;
1388            Int p = hregIndex(vreg);
1389            vassert(IS_VALID_VREGNO(p));
1390            vreg_state[p] = toShort(k);
1391            addToHRegRemap(&remap, vreg, univ->regs[k]);
1392            /* Generate a reload if needed.  This only creates needed
1393               reloads because the live range builder for vregs will
1394               guarantee that the first event for a vreg is a write.
1395               Hence, if this reference is not a write, it cannot be
1396               the first reference for this vreg, and so a reload is
1397               indeed needed. */
1398            if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1399               vassert(vreg_lrs[p].reg_class != HRcINVALID);
1400               HInstr* reload1 = NULL;
1401               HInstr* reload2 = NULL;
1402               (*genReload)( &reload1, &reload2, univ->regs[k],
1403                             vreg_lrs[p].spill_offset, mode64 );
1404               vassert(reload1 || reload2); /* can't both be NULL */
1405               if (reload1)
1406                  EMIT_INSTR(reload1);
1407               if (reload2)
1408                  EMIT_INSTR(reload2);
1409               /* This rreg is read or modified by the instruction.
1410                  If it's merely read we can claim it now equals the
1411                  spill slot, but not so if it is modified. */
1412               if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1413                  rreg_state[k].eq_spill_slot = True;
1414               } else {
1415                  vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1416                  rreg_state[k].eq_spill_slot = False;
1417               }
1418            } else {
1419               rreg_state[k].eq_spill_slot = False;
1420            }
1421
1422            continue;
1423         }
1424
1425         /* Well, now we have no option but to spill a vreg.  It's
1426            important to make a good choice of vreg to spill, and of
1427            course we need to be careful not to spill a vreg which is
1428            needed by this insn. */
1429
1430         /* First, mark in the rreg_state, those rregs which are not spill
1431            candidates, due to holding a vreg mentioned by this
1432            instruction.  Or being of the wrong class. */
1433         for (k = 0; k < n_rregs; k++) {
1434            rreg_state[k].is_spill_cand = False;
1435            if (rreg_state[k].disp != Bound)
1436               continue;
1437            if (hregClass(univ->regs[k]) != hregClass(vreg))
1438               continue;
1439            rreg_state[k].is_spill_cand = True;
1440            /* Note, the following loop visits only the virtual regs
1441               mentioned by the instruction. */
1442            for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1443               if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1444                  rreg_state[k].is_spill_cand = False;
1445                  break;
1446               }
1447            }
1448         }
1449
1450         /* We can choose to spill any rreg satisfying
1451            rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1452            the next use of its associated vreg is as far ahead as
1453            possible, in the hope that this will minimise the number
1454            of consequent reloads required. */
1455         Int spillee
1456            = findMostDistantlyMentionedVReg (
1457                 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs );
1458
1459         if (spillee == -1) {
1460            /* Hmmmmm.  There don't appear to be any spill candidates.
1461               We're hosed. */
1462            vex_printf("reg_alloc: can't find a register in class: ");
1463            ppHRegClass(hregClass(vreg));
1464            vex_printf("\n");
1465            vpanic("reg_alloc: can't create a free register.");
1466         }
1467
1468         /* Right.  So we're going to spill rreg_state[spillee]. */
1469         vassert(IS_VALID_RREGNO(spillee));
1470         vassert(rreg_state[spillee].disp == Bound);
1471         /* check it's the right class */
1472         vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
1473         /* check we're not ejecting the vreg for which we are trying
1474            to free up a register. */
1475         vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1476
1477         m = hregIndex(rreg_state[spillee].vreg);
1478         vassert(IS_VALID_VREGNO(m));
1479
1480         /* So here's the spill store.  Assert that we're spilling a
1481            live vreg. */
1482         vassert(vreg_lrs[m].dead_before > ii);
1483         vassert(vreg_lrs[m].reg_class != HRcINVALID);
1484         if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1485            HInstr* spill1 = NULL;
1486            HInstr* spill2 = NULL;
1487            (*genSpill)( &spill1, &spill2, univ->regs[spillee],
1488                         vreg_lrs[m].spill_offset, mode64 );
1489            vassert(spill1 || spill2); /* can't both be NULL */
1490            if (spill1)
1491               EMIT_INSTR(spill1);
1492            if (spill2)
1493               EMIT_INSTR(spill2);
1494         }
1495
1496         /* Update the rreg_state to reflect the new assignment for this
1497            rreg. */
1498         rreg_state[spillee].vreg = vreg;
1499         vreg_state[m] = INVALID_RREG_NO;
1500
1501         rreg_state[spillee].eq_spill_slot = False; /* be safe */
1502
1503         m = hregIndex(vreg);
1504         vassert(IS_VALID_VREGNO(m));
1505         vreg_state[m] = toShort(spillee);
1506
1507         /* Now, if this vreg is being read or modified (as opposed to
1508            written), we have to generate a reload for it. */
1509         if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1510            vassert(vreg_lrs[m].reg_class != HRcINVALID);
1511            HInstr* reload1 = NULL;
1512            HInstr* reload2 = NULL;
1513            (*genReload)( &reload1, &reload2, univ->regs[spillee],
1514                          vreg_lrs[m].spill_offset, mode64 );
1515            vassert(reload1 || reload2); /* can't both be NULL */
1516            if (reload1)
1517               EMIT_INSTR(reload1);
1518            if (reload2)
1519               EMIT_INSTR(reload2);
1520            /* This rreg is read or modified by the instruction.
1521               If it's merely read we can claim it now equals the
1522               spill slot, but not so if it is modified. */
1523            if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1524               rreg_state[spillee].eq_spill_slot = True;
1525            } else {
1526               vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1527               rreg_state[spillee].eq_spill_slot = False;
1528            }
1529         }
1530
1531         /* So after much twisting and turning, we have vreg mapped to
1532            rreg_state[spillee].rreg.  Note that in the map. */
1533         addToHRegRemap(&remap, vreg, univ->regs[spillee]);
1534
1535      } /* iterate over virtual registers in this instruction. */
1536
1537      /* We've finished clowning around with registers in this instruction.
1538         Three results:
1539         - the running rreg_state[] has been updated
1540         - a suitable vreg->rreg mapping for this instruction has been
1541           constructed
1542         - spill and reload instructions may have been emitted.
1543
1544        The final step is to apply the mapping to the instruction,
1545        and emit that.
1546      */
1547
1548      /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1549      (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1550      EMIT_INSTR( instrs_in->arr[ii] );
1551
1552      if (DEBUG_REGALLOC) {
1553         vex_printf("After dealing with current insn:\n");
1554         PRINT_STATE;
1555         vex_printf("\n");
1556      }
1557
1558      /* ------ Post-instruction actions for fixed rreg uses ------ */
1559
1560      /* Now we need to check for rregs exiting fixed live ranges
1561         after this instruction, and if so mark them as free. */
1562      while (True) {
1563         vassert(rreg_lrs_db_next >= 0);
1564         vassert(rreg_lrs_db_next <= rreg_lrs_used);
1565         if (rreg_lrs_db_next == rreg_lrs_used)
1566            break; /* no more real reg live ranges to consider */
1567         if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1568            break; /* next live range does not yet start */
1569         vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1570         /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1571            range.  Mark it as such in the main rreg_state array. */
1572         HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1573         vassert(!hregIsVirtual(reg));
1574         Int k = hregIndex(reg);
1575         vassert(IS_VALID_RREGNO(k));
1576         vassert(rreg_state[k].disp == Unavail);
1577         rreg_state[k].disp = Free;
1578         rreg_state[k].vreg = INVALID_HREG;
1579         rreg_state[k].eq_spill_slot = False;
1580
1581         /* check for further rregs leaving HLRs at this point */
1582         rreg_lrs_db_next++;
1583      }
1584
1585      if (DEBUG_REGALLOC) {
1586         vex_printf("After post-insn actions for fixed regs:\n");
1587         PRINT_STATE;
1588         vex_printf("\n");
1589      }
1590
1591   } /* iterate over insns */
1592
1593   /* ------ END: Process each insn in turn. ------ */
1594
1595   /* free(rreg_state); */
1596   /* free(rreg_lrs); */
1597   /* if (vreg_lrs) free(vreg_lrs); */
1598
1599   /* Paranoia */
1600   vassert(rreg_lrs_la_next == rreg_lrs_used);
1601   vassert(rreg_lrs_db_next == rreg_lrs_used);
1602
1603   return instrs_out;
1604
1605#  undef INVALID_INSTRNO
1606#  undef EMIT_INSTR
1607#  undef PRINT_STATE
1608}
1609
1610
1611
1612/*---------------------------------------------------------------*/
1613/*---                                       host_reg_alloc2.c ---*/
1614/*---------------------------------------------------------------*/
1615