1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_reg_alloc2.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38
39#include "main_util.h"
40#include "host_generic_regs.h"
41
42/* Set to 1 for lots of debugging output. */
43#define DEBUG_REGALLOC 0
44
45
46/* TODO 27 Oct 04:
47
48   Better consistency checking from what isMove tells us.
49
50   We can possibly do V-V coalescing even when the src is spilled,
51   providing we can arrange for the dst to have the same spill slot.
52
53   Note that state[].hreg is the same as the available real regs.
54
55   Generally rationalise data structures.  */
56
57
58/* Records information on virtual register live ranges.  Computed once
59   and remains unchanged after that. */
60typedef
61   struct {
62      /* Becomes live for the first time after this insn ... */
63      Short live_after;
64      /* Becomes dead for the last time before this insn ... */
65      Short dead_before;
66      /* The "home" spill slot, if needed.  Never changes. */
67      Short spill_offset;
68      Short spill_size;
69      /* What kind of register this is. */
70      HRegClass reg_class;
71   }
72   VRegLR;
73
74
75/* Records information on real-register live ranges.  Computed once
76   and remains unchanged after that. */
77typedef
78   struct {
79      HReg rreg;
80      /* Becomes live after this insn ... */
81      Short live_after;
82      /* Becomes dead before this insn ... */
83      Short dead_before;
84   }
85   RRegLR;
86
87
88/* An array of the following structs (rreg_state) comprises the
89   running state of the allocator.  It indicates what the current
90   disposition of each allocatable real register is.  The array gets
91   updated as the allocator processes instructions. */
92typedef
93   struct {
94      /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95      /* Which rreg is this for? */
96      HReg rreg;
97      /* Is this involved in any HLRs?  (only an optimisation hint) */
98      Bool has_hlrs;
99      /* ------ FIELDS WHICH DO CHANGE ------ */
100      /* 6 May 07: rearranged fields below so the whole struct fits
101         into 16 bytes on both x86 and amd64. */
102      /* Used when .disp == Bound and we are looking for vregs to
103         spill. */
104      Bool is_spill_cand;
105      /* Optimisation: used when .disp == Bound.  Indicates when the
106         rreg has the same value as the spill slot for the associated
107         vreg.  Is safely left at False, and becomes True after a
108         spill store or reload for this rreg. */
109      Bool eq_spill_slot;
110      /* What's it's current disposition? */
111      enum { Free,     /* available for use */
112             Unavail,  /* in a real-reg live range */
113             Bound     /* in use (holding value of some vreg) */
114           }
115           disp;
116      /* If .disp == Bound, what vreg is it bound to? */
117      HReg vreg;
118   }
119   RRegState;
120
121
122/* The allocator also maintains a redundant array of indexes
123   (vreg_state) from vreg numbers back to entries in rreg_state.  It
124   is redundant because iff vreg_state[i] == j then
125   hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126   point at each other.  The purpose of this is to speed up activities
127   which involve looking for a particular vreg: there is no need to
128   scan the rreg_state looking for it, just index directly into
129   vreg_state.  The FAQ "does this vreg already have an associated
130   rreg" is the main beneficiary.
131
132   To indicate, in vreg_state[i], that a given vreg is not currently
133   associated with any rreg, that entry can be set to INVALID_RREG_NO.
134
135   Because the vreg_state entries are signed Shorts, the max number
136   of vregs that can be handed by regalloc is 32767.
137*/
138
139#define INVALID_RREG_NO ((Short)(-1))
140
141#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143
144
145/* Does this instruction mention a particular reg? */
146static Bool instrMentionsReg (
147   void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148   HInstr* instr,
149   HReg r,
150   Bool mode64
151)
152{
153   Int       i;
154   HRegUsage reg_usage;
155   (*getRegUsage)(&reg_usage, instr, mode64);
156   for (i = 0; i < reg_usage.n_used; i++)
157      if (sameHReg(reg_usage.hreg[i], r))
158         return True;
159   return False;
160}
161
162
163/* Search forward from some given point in the incoming instruction
164   sequence.  Point is to select a virtual register to spill, by
165   finding the vreg which is mentioned as far ahead as possible, in
166   the hope that this will minimise the number of consequent reloads.
167
168   Only do the search for vregs which are Bound in the running state,
169   and for which the .is_spill_cand field is set.  This allows the
170   caller to arbitrarily restrict the set of spill candidates to be
171   considered.
172
173   Returns an index into the state array indicating the (v,r) pair to
174   spill, or -1 if none was found.  */
175static
176Int findMostDistantlyMentionedVReg (
177   void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178   HInstrArray* instrs_in,
179   Int          search_from_instr,
180   RRegState*   state,
181   Int          n_state,
182   Bool         mode64
183)
184{
185   Int k, m;
186   Int furthest_k = -1;
187   Int furthest   = -1;
188   vassert(search_from_instr >= 0);
189   for (k = 0; k < n_state; k++) {
190      if (!state[k].is_spill_cand)
191         continue;
192      vassert(state[k].disp == Bound);
193      for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194         if (instrMentionsReg(getRegUsage,
195                              instrs_in->arr[m], state[k].vreg, mode64))
196            break;
197      }
198      if (m > furthest) {
199         furthest   = m;
200         furthest_k = k;
201      }
202   }
203   return furthest_k;
204}
205
206
207/* Check that this vreg has been assigned a sane spill offset. */
208static inline void sanity_check_spill_offset ( VRegLR* vreg )
209{
210   switch (vreg->reg_class) {
211      case HRcVec128: case HRcFlt64:
212         vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
213      default:
214         vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
215   }
216}
217
218
219/* Double the size of the real-reg live-range array, if needed. */
220static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
221{
222   Int     k;
223   RRegLR* arr2;
224   if (used < *size) return;
225   if (0)
226      vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
227   vassert(used == *size);
228   arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
229   for (k = 0; k < *size; k++)
230      arr2[k] = (*info)[k];
231   *size *= 2;
232   *info = arr2;
233}
234
235
236/* Sort an array of RRegLR entries by either the .live_after or
237   .dead_before fields.  This is performance-critical. */
238static void sortRRLRarray ( RRegLR* arr,
239                            Int size, Bool by_live_after )
240{
241   Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
242                       9841, 29524, 88573, 265720,
243                       797161, 2391484 };
244   Int    lo = 0;
245   Int    hi = size-1;
246   Int    i, j, h, bigN, hp;
247   RRegLR v;
248
249   vassert(size >= 0);
250   if (size == 0)
251      return;
252
253   bigN = hi - lo + 1; if (bigN < 2) return;
254   hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
255
256   if (by_live_after) {
257
258      for ( ; hp >= 0; hp--) {
259         h = incs[hp];
260         for (i = lo + h; i <= hi; i++) {
261            v = arr[i];
262            j = i;
263            while (arr[j-h].live_after > v.live_after) {
264               arr[j] = arr[j-h];
265               j = j - h;
266               if (j <= (lo + h - 1)) break;
267            }
268            arr[j] = v;
269         }
270      }
271
272   } else {
273
274      for ( ; hp >= 0; hp--) {
275         h = incs[hp];
276         for (i = lo + h; i <= hi; i++) {
277            v = arr[i];
278            j = i;
279            while (arr[j-h].dead_before > v.dead_before) {
280               arr[j] = arr[j-h];
281               j = j - h;
282               if (j <= (lo + h - 1)) break;
283            }
284            arr[j] = v;
285         }
286      }
287
288   }
289}
290
291
292/* A target-independent register allocator.  Requires various
293   functions which it uses to deal abstractly with instructions and
294   registers, since it cannot have any target-specific knowledge.
295
296   Returns a new list of instructions, which, as a result of the
297   behaviour of mapRegs, will be in-place modifications of the
298   original instructions.
299
300   Requires that the incoming code has been generated using
301   vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
302   that range is a checked run-time error.
303
304   Takes an expandable array of pointers to unallocated insns.
305   Returns an expandable array of pointers to allocated insns.
306*/
307HInstrArray* doRegisterAllocation (
308
309   /* Incoming virtual-registerised code. */
310   HInstrArray* instrs_in,
311
312   /* An array listing all the real registers the allocator may use,
313      in no particular order. */
314   HReg* available_real_regs,
315   Int   n_available_real_regs,
316
317   /* Return True iff the given insn is a reg-reg move, in which
318      case also return the src and dst regs. */
319   Bool (*isMove) ( HInstr*, HReg*, HReg* ),
320
321   /* Get info about register usage in this insn. */
322   void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
323
324   /* Apply a reg-reg mapping to an insn. */
325   void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
326
327   /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
328      real reg to a spill slot byte offset.  The two leading HInstr**
329      args are out parameters, through which the generated insns are
330      returned.  Also (optionally) a 'directReload' function, which
331      attempts to replace a given instruction by one which reads
332      directly from a specified spill slot.  May be NULL, in which
333      case the optimisation is not attempted. */
334   void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
335   void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
336   HInstr* (*directReload) ( HInstr*, HReg, Short ),
337   Int     guest_sizeB,
338
339   /* For debug printing only. */
340   void (*ppInstr) ( HInstr*, Bool ),
341   void (*ppReg) ( HReg ),
342
343   /* 32/64bit mode */
344   Bool mode64
345)
346{
347#  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
348
349   const Bool eq_spill_opt = True;
350
351   /* Iterators and temporaries. */
352   Int       ii, j, k, m, spillee, k_suboptimal;
353   HReg      rreg, vreg, vregS, vregD;
354   HRegUsage reg_usage;
355
356   /* Info on vregs and rregs.  Computed once and remains
357      unchanged. */
358   Int     n_vregs;
359   VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
360
361   /* We keep two copies of the real-reg live range info, one sorted
362      by .live_after and the other by .dead_before.  First the
363      unsorted info is created in the _la variant is copied into the
364      _db variant.  Once that's done both of them are sorted.
365      We also need two integer cursors which record the next
366      location in the two arrays to consider. */
367   RRegLR* rreg_lrs_la;
368   RRegLR* rreg_lrs_db;
369   Int     rreg_lrs_size;
370   Int     rreg_lrs_used;
371   Int     rreg_lrs_la_next;
372   Int     rreg_lrs_db_next;
373
374   /* Used when constructing vreg_lrs (for allocating stack
375      slots). */
376   Int ss_busy_until_before[N_SPILL64S];
377
378   /* Used when constructing rreg_lrs. */
379   Int* rreg_live_after;
380   Int* rreg_dead_before;
381
382   /* Running state of the core allocation algorithm. */
383   RRegState* rreg_state;  /* [0 .. n_rregs-1] */
384   Int        n_rregs;
385
386   /* .. and the redundant backward map */
387   /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
388      This inplies n_rregs must be <= 32768. */
389   Short*     vreg_state;  /* [0 .. n_vregs-1] */
390
391   /* The vreg -> rreg map constructed and then applied to each
392      instr. */
393   HRegRemap remap;
394
395   /* The output array of instructions. */
396   HInstrArray* instrs_out;
397
398   /* Sanity checks are expensive.  They are only done periodically,
399      not at each insn processed. */
400   Bool do_sanity_check;
401
402   vassert(0 == (guest_sizeB % 16));
403   vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
404   vassert(0 == (N_SPILL64S % 2));
405
406   /* The live range numbers are signed shorts, and so limiting the
407      number of insns to 15000 comfortably guards against them
408      overflowing 32k. */
409   vassert(instrs_in->arr_used <= 15000);
410
411#  define INVALID_INSTRNO (-2)
412
413#  define EMIT_INSTR(_instr)                  \
414      do {                                    \
415        HInstr* _tmp = (_instr);              \
416        if (DEBUG_REGALLOC) {                 \
417           vex_printf("**  ");                \
418           (*ppInstr)(_tmp, mode64);          \
419           vex_printf("\n\n");                \
420        }                                     \
421        addHInstr ( instrs_out, _tmp );       \
422      } while (0)
423
424#   define PRINT_STATE						   \
425      do {							   \
426         Int z, q;						   \
427         for (z = 0; z < n_rregs; z++) {			   \
428            vex_printf("  rreg_state[%2d] = ", z);		   \
429            (*ppReg)(rreg_state[z].rreg);	       		   \
430            vex_printf("  \t");					   \
431            switch (rreg_state[z].disp) {			   \
432               case Free:    vex_printf("Free\n"); break;	   \
433               case Unavail: vex_printf("Unavail\n"); break;	   \
434               case Bound:   vex_printf("BoundTo "); 		   \
435                             (*ppReg)(rreg_state[z].vreg);	   \
436                             vex_printf("\n"); break;		   \
437            }							   \
438         }							   \
439         vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
440         q = 0;                                                    \
441         for (z = 0; z < n_vregs; z++) {                           \
442            if (vreg_state[z] == INVALID_RREG_NO)                  \
443               continue;                                           \
444            vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
445            q++;                                                   \
446            if (q > 0 && (q % 6) == 0)                             \
447               vex_printf("\n    ");                               \
448         }                                                         \
449         vex_printf("\n");                                         \
450      } while (0)
451
452
453   /* --------- Stage 0: set up output array --------- */
454   /* --------- and allocate/initialise running state. --------- */
455
456   instrs_out = newHInstrArray();
457
458   /* ... and initialise running state. */
459   /* n_rregs is no more than a short name for n_available_real_regs. */
460   n_rregs = n_available_real_regs;
461   n_vregs = instrs_in->n_vregs;
462
463   /* If this is not so, vreg_state entries will overflow. */
464   vassert(n_vregs < 32767);
465
466   rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
467   vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
468
469   for (j = 0; j < n_rregs; j++) {
470      rreg_state[j].rreg          = available_real_regs[j];
471      rreg_state[j].has_hlrs      = False;
472      rreg_state[j].disp          = Free;
473      rreg_state[j].vreg          = INVALID_HREG;
474      rreg_state[j].is_spill_cand = False;
475      rreg_state[j].eq_spill_slot = False;
476   }
477
478   for (j = 0; j < n_vregs; j++)
479      vreg_state[j] = INVALID_RREG_NO;
480
481
482   /* --------- Stage 1: compute vreg live ranges. --------- */
483   /* --------- Stage 2: compute rreg live ranges. --------- */
484
485   /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486
487   /* This is relatively simple, because (1) we only seek the complete
488      end-to-end live range of each vreg, and are not interested in
489      any holes in it, and (2) the vregs are conveniently numbered 0
490      .. n_vregs-1, so we can just dump the results in a
491      pre-allocated array. */
492
493   vreg_lrs = NULL;
494   if (n_vregs > 0)
495      vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
496
497   for (j = 0; j < n_vregs; j++) {
498      vreg_lrs[j].live_after     = INVALID_INSTRNO;
499      vreg_lrs[j].dead_before    = INVALID_INSTRNO;
500      vreg_lrs[j].spill_offset   = 0;
501      vreg_lrs[j].spill_size     = 0;
502      vreg_lrs[j].reg_class      = HRcINVALID;
503   }
504
505   /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
506
507   /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
508
509   /* This is more complex than Stage 1, because we need to compute
510      exactly all the live ranges of all the allocatable real regs,
511      and we don't know in advance how many there will be. */
512
513   rreg_lrs_used = 0;
514   rreg_lrs_size = 4;
515   rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
516   rreg_lrs_db = NULL; /* we'll create this later */
517
518   /* We'll need to track live range start/end points seperately for
519      each rreg.  Sigh. */
520   vassert(n_available_real_regs > 0);
521   rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522   rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
523
524   for (j = 0; j < n_available_real_regs; j++) {
525      rreg_live_after[j] =
526      rreg_dead_before[j] = INVALID_INSTRNO;
527   }
528
529   /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
530
531   /* ------ start of ITERATE OVER INSNS ------ */
532
533   for (ii = 0; ii < instrs_in->arr_used; ii++) {
534
535      (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
536
537#     if 0
538      vex_printf("\n%d  stage1: ", ii);
539      (*ppInstr)(instrs_in->arr[ii], mode64);
540      vex_printf("\n");
541      ppHRegUsage(&reg_usage);
542#     endif
543
544      /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
545
546      /* for each reg mentioned in the insn ... */
547      for (j = 0; j < reg_usage.n_used; j++) {
548
549         vreg = reg_usage.hreg[j];
550         /* only interested in virtual registers right now. */
551         if (!hregIsVirtual(vreg))
552            continue;
553         k = hregNumber(vreg);
554         if (k < 0 || k >= n_vregs) {
555            vex_printf("\n");
556            (*ppInstr)(instrs_in->arr[ii], mode64);
557            vex_printf("\n");
558            vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
559            vpanic("doRegisterAllocation: out-of-range vreg");
560         }
561
562         /* Take the opportunity to note its regclass.  We'll need
563            that when allocating spill slots. */
564         if (vreg_lrs[k].reg_class == HRcINVALID) {
565            /* First mention of this vreg. */
566            vreg_lrs[k].reg_class = hregClass(vreg);
567         } else {
568            /* Seen it before, so check for consistency. */
569            vassert(vreg_lrs[k].reg_class == hregClass(vreg));
570         }
571
572         /* Now consider live ranges. */
573         switch (reg_usage.mode[j]) {
574            case HRmRead:
575               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
576                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
577                  vpanic("doRegisterAllocation: "
578                         "first event for vreg is Read");
579               }
580               vreg_lrs[k].dead_before = toShort(ii + 1);
581               break;
582            case HRmWrite:
583               if (vreg_lrs[k].live_after == INVALID_INSTRNO)
584                  vreg_lrs[k].live_after = toShort(ii);
585               vreg_lrs[k].dead_before = toShort(ii + 1);
586               break;
587            case HRmModify:
588               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
589                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
590                  vpanic("doRegisterAllocation: "
591                         "first event for vreg is Modify");
592               }
593               vreg_lrs[k].dead_before = toShort(ii + 1);
594               break;
595            default:
596               vpanic("doRegisterAllocation(1)");
597         } /* switch */
598
599      } /* iterate over registers */
600
601      /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
602
603      /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
604
605      /* for each reg mentioned in the insn ... */
606      for (j = 0; j < reg_usage.n_used; j++) {
607
608         /* Dummy initialisations of flush_la and flush_db to avoid
609            possible bogus uninit-var warnings from gcc. */
610         Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
611         Bool flush;
612
613         rreg = reg_usage.hreg[j];
614
615         /* only interested in real registers right now. */
616         if (hregIsVirtual(rreg))
617            continue;
618
619         /* Furthermore, we're not interested in this rreg unless it's
620            one of the allocatable ones.  For example, it could be a
621            stack pointer register, or some other register beyond our
622            control, in which case we should just ignore it. */
623         for (k = 0; k < n_available_real_regs; k++)
624            if (sameHReg(available_real_regs[k], rreg))
625               break;
626         if (k == n_available_real_regs)
627            continue; /* not found -- ignore. */
628         flush = False;
629         switch (reg_usage.mode[j]) {
630            case HRmWrite:
631               flush_la = rreg_live_after[k];
632               flush_db = rreg_dead_before[k];
633               if (flush_la != INVALID_INSTRNO
634                   && flush_db != INVALID_INSTRNO)
635                  flush = True;
636               rreg_live_after[k]  = ii;
637               rreg_dead_before[k] = ii+1;
638               break;
639            case HRmRead:
640               if (rreg_live_after[k] == INVALID_INSTRNO) {
641                  vex_printf("\nOFFENDING RREG = ");
642                  (*ppReg)(available_real_regs[k]);
643                  vex_printf("\n");
644                  vex_printf("\nOFFENDING instr = ");
645                  (*ppInstr)(instrs_in->arr[ii], mode64);
646                  vex_printf("\n");
647                  vpanic("doRegisterAllocation: "
648                         "first event for rreg is Read");
649               }
650               rreg_dead_before[k] = ii+1;
651               break;
652            case HRmModify:
653               if (rreg_live_after[k] == INVALID_INSTRNO) {
654                  vex_printf("\nOFFENDING RREG = ");
655                  (*ppReg)(available_real_regs[k]);
656                  vex_printf("\n");
657                  vex_printf("\nOFFENDING instr = ");
658                  (*ppInstr)(instrs_in->arr[ii], mode64);
659                  vex_printf("\n");
660                  vpanic("doRegisterAllocation: "
661                         "first event for rreg is Modify");
662               }
663               rreg_dead_before[k] = ii+1;
664               break;
665            default:
666               vpanic("doRegisterAllocation(2)");
667         }
668
669         if (flush) {
670            vassert(flush_la != INVALID_INSTRNO);
671            vassert(flush_db != INVALID_INSTRNO);
672            ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
673            if (0)
674               vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
675            rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
676            rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
677            rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
678            rreg_lrs_used++;
679         }
680
681      } /* iterate over regs in the instr */
682
683      /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
684
685   } /* iterate over insns */
686
687   /* ------ end of ITERATE OVER INSNS ------ */
688
689   /* ------ start of FINALISE RREG LIVE RANGES ------ */
690
691   /* Now finish up any live ranges left over. */
692   for (j = 0; j < n_available_real_regs; j++) {
693
694#     if 0
695      vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
696                                             rreg_dead_before[j]);
697#     endif
698      vassert( (rreg_live_after[j] == INVALID_INSTRNO
699               && rreg_dead_before[j] == INVALID_INSTRNO)
700              ||
701              (rreg_live_after[j] != INVALID_INSTRNO
702               && rreg_dead_before[j] != INVALID_INSTRNO)
703            );
704
705      if (rreg_live_after[j] == INVALID_INSTRNO)
706         continue;
707
708      ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
709      if (0)
710         vex_printf("FLUSH 2 (%d,%d)\n",
711                    rreg_live_after[j], rreg_dead_before[j]);
712      rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
713      rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
714      rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
715      rreg_lrs_used++;
716   }
717
718   /* Compute summary hints for choosing real regs.  If a real reg is
719      involved in a hard live range, record that fact in the fixed
720      part of the running rreg_state.  Later, when offered a choice between
721      rregs, it's better to choose one which is not marked as having
722      any HLRs, since ones with HLRs may need to be spilled around
723      their HLRs.  Correctness of final assignment is unaffected by
724      this mechanism -- it is only an optimisation. */
725
726   for (j = 0; j < rreg_lrs_used; j++) {
727      rreg = rreg_lrs_la[j].rreg;
728      vassert(!hregIsVirtual(rreg));
729      /* rreg is involved in a HLR.  Record this info in the array, if
730         there is space. */
731      for (k = 0; k < n_rregs; k++)
732         if (sameHReg(rreg_state[k].rreg, rreg))
733            break;
734      vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
735      rreg_state[k].has_hlrs = True;
736   }
737   if (0) {
738      for (j = 0; j < n_rregs; j++) {
739         if (!rreg_state[j].has_hlrs)
740            continue;
741         ppReg(rreg_state[j].rreg);
742         vex_printf(" hinted\n");
743      }
744   }
745
746   /* Finally, copy the _la variant into the _db variant and
747      sort both by their respective fields. */
748   rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
749   for (j = 0; j < rreg_lrs_used; j++)
750      rreg_lrs_db[j] = rreg_lrs_la[j];
751
752   sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
753   sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
754
755   /* And set up the cursors. */
756   rreg_lrs_la_next = 0;
757   rreg_lrs_db_next = 0;
758
759   for (j = 1; j < rreg_lrs_used; j++) {
760      vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
761      vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
762   }
763
764   /* ------ end of FINALISE RREG LIVE RANGES ------ */
765
766#  if DEBUG_REGALLOC
767   for (j = 0; j < n_vregs; j++) {
768      vex_printf("vreg %d:  la = %d,  db = %d\n",
769                 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
770   }
771#  endif
772
773#  if DEBUG_REGALLOC
774   vex_printf("RRegLRs by LA:\n");
775   for (j = 0; j < rreg_lrs_used; j++) {
776      vex_printf("  ");
777      (*ppReg)(rreg_lrs_la[j].rreg);
778      vex_printf("      la = %d,  db = %d\n",
779                 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
780   }
781   vex_printf("RRegLRs by DB:\n");
782   for (j = 0; j < rreg_lrs_used; j++) {
783      vex_printf("  ");
784      (*ppReg)(rreg_lrs_db[j].rreg);
785      vex_printf("      la = %d,  db = %d\n",
786                 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
787   }
788#  endif
789
790   /* --------- Stage 3: allocate spill slots. --------- */
791
792   /* Each spill slot is 8 bytes long.  For vregs which take more than
793      64 bits to spill (classes Flt64 and Vec128), we have to allocate
794      two consecutive spill slots.  For 256 bit registers (class
795      Vec256), we have to allocate four consecutive spill slots.
796
797      For Vec128-class on PowerPC, the spill slot's actual address
798      must be 16-byte aligned.  Since the spill slot's address is
799      computed as an offset from the guest state pointer, and since
800      the user of the generated code must set that pointer to a
801      32-aligned value, we have the residual obligation here of
802      choosing a 16-aligned spill slot offset for Vec128-class values.
803      Since each spill slot is 8 bytes long, that means for
804      Vec128-class values we must allocated a spill slot number which
805      is zero mod 2.
806
807      Similarly, for Vec256 calss on amd64, find a spill slot number
808      which is zero mod 4.  This guarantees it will be 32 byte
809      aligned, which isn't actually necessary on amd64 (we use movUpd
810      etc to spill), but seems like good practice.
811
812      Do a rank-based allocation of vregs to spill slot numbers.  We
813      put as few values as possible in spill slots, but nevertheless
814      need to have a spill slot available for all vregs, just in case.
815   */
816   /* max_ss_no = -1; */
817
818   for (j = 0; j < N_SPILL64S; j++)
819      ss_busy_until_before[j] = 0;
820
821   for (j = 0; j < n_vregs; j++) {
822
823      /* True iff this vreg is unused.  In which case we also expect
824         that the reg_class field for it has not been set.  */
825      if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
826         vassert(vreg_lrs[j].reg_class == HRcINVALID);
827         continue;
828      }
829
830      /* The spill slots are 64 bits in size.  As per the comment on
831         definition of HRegClass in host_generic_regs.h, that means,
832         to spill a vreg of class Flt64 or Vec128, we'll need to find
833         two adjacent spill slots to use.  For Vec256, we'll need to
834         find four adjacent slots to use.  Note, this logic needs to
835         kept in sync with the size info on the definition of
836         HRegClass. */
837      switch (vreg_lrs[j].reg_class) {
838
839         case HRcVec128: case HRcFlt64:
840            /* Find two adjacent free slots in which between them
841               provide up to 128 bits in which to spill the vreg.
842               Since we are trying to find an even:odd pair, move
843               along in steps of 2 (slots). */
844            for (k = 0; k < N_SPILL64S-1; k += 2)
845               if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
846                   && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
847                  break;
848            if (k >= N_SPILL64S-1) {
849               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
850                      "Increase and recompile.");
851            }
852            if (0) vex_printf("16-byte spill offset in spill slot %d\n",
853                              (Int)k);
854            ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
855            ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
856            break;
857
858         default:
859            /* The ordinary case -- just find a single spill slot. */
860            /* Find the lowest-numbered spill slot which is available
861               at the start point of this interval, and assign the
862               interval to it. */
863            for (k = 0; k < N_SPILL64S; k++)
864               if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
865                  break;
866            if (k == N_SPILL64S) {
867               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
868                      "Increase and recompile.");
869            }
870            ss_busy_until_before[k] = vreg_lrs[j].dead_before;
871            break;
872
873      } /* switch (vreg_lrs[j].reg_class) { */
874
875      /* This reflects LibVEX's hard-wired knowledge of the baseBlock
876         layout: the guest state, then two equal sized areas following
877         it for two sets of shadow state, and then the spill area. */
878      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
879
880      /* Independent check that we've made a sane choice of slot */
881      sanity_check_spill_offset( &vreg_lrs[j] );
882      /* if (j > max_ss_no) */
883      /*    max_ss_no = j; */
884   }
885
886#  if 0
887   vex_printf("\n\n");
888   for (j = 0; j < n_vregs; j++)
889      vex_printf("vreg %d    --> spill offset %d\n",
890                 j, vreg_lrs[j].spill_offset);
891#  endif
892
893   /* --------- Stage 4: establish rreg preferences --------- */
894
895   /* It may be advantageous to allocating certain vregs to specific
896      rregs, as a way of avoiding reg-reg moves later.  Here we
897      establish which, if any, rreg each vreg would prefer to be in.
898      Note that this constrains the allocator -- ideally we end up
899      with as few as possible vregs expressing a preference.
900
901      This is an optimisation: if the .preferred_rreg field is never
902      set to anything different from INVALID_HREG, the allocator still
903      works. */
904
905   /* 30 Dec 04: removed this mechanism as it does not seem to
906      help. */
907
908   /* --------- Stage 5: process instructions --------- */
909
910   /* This is the main loop of the allocator.  First, we need to
911      correctly set up our running state, which tracks the status of
912      each real register. */
913
914   /* ------ BEGIN: Process each insn in turn. ------ */
915
916   for (ii = 0; ii < instrs_in->arr_used; ii++) {
917
918#     if DEBUG_REGALLOC
919      vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
920      vex_printf("---- ");
921      (*ppInstr)(instrs_in->arr[ii], mode64);
922      vex_printf("\n\nInitial state:\n");
923      PRINT_STATE;
924      vex_printf("\n");
925#     endif
926
927      /* ------------ Sanity checks ------------ */
928
929      /* Sanity checks are expensive.  So they are done only once
930         every 7 instructions, and just before the last
931         instruction. */
932      do_sanity_check
933         = toBool(
934              False  /* Set to True for sanity checking of all insns. */
935              || ii == instrs_in->arr_used-1
936              || (ii > 0 && (ii % 7) == 0)
937           );
938
939      if (do_sanity_check) {
940
941         /* Sanity check 1: all rregs with a hard live range crossing
942            this insn must be marked as unavailable in the running
943            state. */
944         for (j = 0; j < rreg_lrs_used; j++) {
945            if (rreg_lrs_la[j].live_after < ii
946                && ii < rreg_lrs_la[j].dead_before) {
947               /* ii is the middle of a hard live range for some real
948                  reg.  Check it's marked as such in the running
949                  state. */
950
951#              if 0
952               vex_printf("considering la %d .. db %d   reg = ",
953                          rreg_lrs[j].live_after,
954                          rreg_lrs[j].dead_before);
955               (*ppReg)(rreg_lrs[j].rreg);
956               vex_printf("\n");
957#              endif
958
959               /* find the state entry for this rreg */
960               for (k = 0; k < n_rregs; k++)
961                  if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[j].rreg))
962                     break;
963
964               /* and assert that this rreg is marked as unavailable */
965               vassert(rreg_state[k].disp == Unavail);
966            }
967         }
968
969         /* Sanity check 2: conversely, all rregs marked as
970            unavailable in the running rreg_state must have a
971            corresponding hard live range entry in the rreg_lrs
972            array. */
973         for (j = 0; j < n_available_real_regs; j++) {
974            vassert(rreg_state[j].disp == Bound
975                    || rreg_state[j].disp == Free
976                    || rreg_state[j].disp == Unavail);
977            if (rreg_state[j].disp != Unavail)
978               continue;
979            for (k = 0; k < rreg_lrs_used; k++)
980               if (sameHReg(rreg_lrs_la[k].rreg, rreg_state[j].rreg)
981                   && rreg_lrs_la[k].live_after < ii
982                   && ii < rreg_lrs_la[k].dead_before)
983                  break;
984            /* If this vassertion fails, we couldn't find a
985               corresponding HLR. */
986            vassert(k < rreg_lrs_used);
987         }
988
989         /* Sanity check 3: all vreg-rreg bindings must bind registers
990            of the same class. */
991         for (j = 0; j < n_rregs; j++) {
992            if (rreg_state[j].disp != Bound) {
993               vassert(rreg_state[j].eq_spill_slot == False);
994               continue;
995            }
996            vassert(hregClass(rreg_state[j].rreg)
997                    == hregClass(rreg_state[j].vreg));
998            vassert( hregIsVirtual(rreg_state[j].vreg));
999            vassert(!hregIsVirtual(rreg_state[j].rreg));
1000         }
1001
1002         /* Sanity check 4: the vreg_state and rreg_state
1003            mutually-redundant mappings are consistent.  If
1004            rreg_state[j].vreg points at some vreg_state entry then
1005            that vreg_state entry should point back at
1006            rreg_state[j]. */
1007         for (j = 0; j < n_rregs; j++) {
1008            if (rreg_state[j].disp != Bound)
1009               continue;
1010            k = hregNumber(rreg_state[j].vreg);
1011            vassert(IS_VALID_VREGNO(k));
1012            vassert(vreg_state[k] == j);
1013         }
1014         for (j = 0; j < n_vregs; j++) {
1015            k = vreg_state[j];
1016            if (k == INVALID_RREG_NO)
1017               continue;
1018            vassert(IS_VALID_RREGNO(k));
1019            vassert(rreg_state[k].disp == Bound);
1020            vassert(hregNumber(rreg_state[k].vreg) == j);
1021         }
1022
1023      } /* if (do_sanity_check) */
1024
1025      /* ------------ end of Sanity checks ------------ */
1026
1027      /* Do various optimisations pertaining to register coalescing
1028         and preferencing:
1029            MOV  v <-> v   coalescing (done here).
1030            MOV  v <-> r   coalescing (not yet, if ever)
1031      */
1032      /* If doing a reg-reg move between two vregs, and the src's live
1033         range ends here and the dst's live range starts here, bind
1034         the dst to the src's rreg, and that's all. */
1035      if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1036         if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1037         if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1038         /* Check that *isMove is not telling us a bunch of lies ... */
1039         vassert(hregClass(vregS) == hregClass(vregD));
1040         k = hregNumber(vregS);
1041         m = hregNumber(vregD);
1042         vassert(IS_VALID_VREGNO(k));
1043         vassert(IS_VALID_VREGNO(m));
1044         if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1045         if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1046#        if DEBUG_REGALLOC
1047         vex_printf("COALESCE ");
1048         (*ppReg)(vregS);
1049         vex_printf(" -> ");
1050         (*ppReg)(vregD);
1051         vex_printf("\n\n");
1052#        endif
1053         /* Find the state entry for vregS. */
1054         for (m = 0; m < n_rregs; m++)
1055            if (rreg_state[m].disp == Bound
1056                && sameHReg(rreg_state[m].vreg, vregS))
1057               break;
1058         if (m == n_rregs)
1059            /* We failed to find a binding for vregS, which means it's
1060               currently not in a register.  So we can't do the
1061               coalescing.  Give up. */
1062            goto cannot_coalesce;
1063
1064         /* Finally, we can do the coalescing.  It's trivial -- merely
1065            claim vregS's register for vregD. */
1066         rreg_state[m].vreg = vregD;
1067         vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1068         vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1069         vreg_state[hregNumber(vregD)] = toShort(m);
1070         vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1071
1072         /* This rreg has become associated with a different vreg and
1073            hence with a different spill slot.  Play safe. */
1074         rreg_state[m].eq_spill_slot = False;
1075
1076         /* Move on to the next insn.  We skip the post-insn stuff for
1077            fixed registers, since this move should not interact with
1078            them in any way. */
1079         continue;
1080      }
1081     cannot_coalesce:
1082
1083      /* ------ Free up rregs bound to dead vregs ------ */
1084
1085      /* Look for vregs whose live range has just ended, and
1086	 mark the associated rreg as free. */
1087
1088      for (j = 0; j < n_rregs; j++) {
1089         if (rreg_state[j].disp != Bound)
1090            continue;
1091         UInt vregno = hregNumber(rreg_state[j].vreg);
1092         vassert(IS_VALID_VREGNO(vregno));
1093         if (vreg_lrs[vregno].dead_before <= ii) {
1094            rreg_state[j].disp = Free;
1095            rreg_state[j].eq_spill_slot = False;
1096            m = hregNumber(rreg_state[j].vreg);
1097            vassert(IS_VALID_VREGNO(m));
1098            vreg_state[m] = INVALID_RREG_NO;
1099            if (DEBUG_REGALLOC) {
1100               vex_printf("free up ");
1101               (*ppReg)(rreg_state[j].rreg);
1102               vex_printf("\n");
1103            }
1104         }
1105      }
1106
1107      /* ------ Pre-instruction actions for fixed rreg uses ------ */
1108
1109      /* Now we have to deal with rregs which are about to be made
1110         live by this instruction -- in other words, are entering into
1111         one of their live ranges.  If any such rreg holds a vreg, we
1112         will have to free up the rreg.  The simplest solution which
1113         is correct is to spill the rreg.
1114
1115         Note we could do better:
1116         * Could move it into some other free rreg, if one is available
1117
1118         Do this efficiently, by incrementally stepping along an array
1119         of rreg HLRs that are known to be sorted by start point
1120         (their .live_after field).
1121      */
1122      while (True) {
1123         vassert(rreg_lrs_la_next >= 0);
1124         vassert(rreg_lrs_la_next <= rreg_lrs_used);
1125         if (rreg_lrs_la_next == rreg_lrs_used)
1126            break; /* no more real reg live ranges to consider */
1127         if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1128            break; /* next live range does not yet start */
1129         vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1130         /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1131            Find the associated rreg_state entry. */
1132         /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1133            Real register live ranges are guaranteed to be well-formed
1134            in that they start with a write to the register -- Stage 2
1135            rejects any code not satisfying this.  So the correct
1136            question to ask is whether
1137            rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1138            whether the reg becomes live after this insn -- rather
1139            than before it. */
1140#        if DEBUG_REGALLOC
1141         vex_printf("need to free up rreg: ");
1142         (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1143         vex_printf("\n\n");
1144#        endif
1145         for (k = 0; k < n_rregs; k++)
1146            if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[rreg_lrs_la_next].rreg))
1147               break;
1148         /* If this fails, we don't have an entry for this rreg.
1149            Which we should. */
1150         vassert(IS_VALID_RREGNO(k));
1151         m = hregNumber(rreg_state[k].vreg);
1152         if (rreg_state[k].disp == Bound) {
1153            /* Yes, there is an associated vreg.  Spill it if it's
1154               still live. */
1155            vassert(IS_VALID_VREGNO(m));
1156            vreg_state[m] = INVALID_RREG_NO;
1157            if (vreg_lrs[m].dead_before > ii) {
1158               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1159               if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1160                  HInstr* spill1 = NULL;
1161                  HInstr* spill2 = NULL;
1162                  (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1163                               vreg_lrs[m].spill_offset, mode64 );
1164                  vassert(spill1 || spill2); /* can't both be NULL */
1165                  if (spill1)
1166                     EMIT_INSTR(spill1);
1167                  if (spill2)
1168                     EMIT_INSTR(spill2);
1169               }
1170               rreg_state[k].eq_spill_slot = True;
1171            }
1172         }
1173         rreg_state[k].disp = Unavail;
1174         rreg_state[k].vreg = INVALID_HREG;
1175         rreg_state[k].eq_spill_slot = False;
1176
1177         /* check for further rregs entering HLRs at this point */
1178         rreg_lrs_la_next++;
1179      }
1180
1181
1182#     if DEBUG_REGALLOC
1183      vex_printf("After pre-insn actions for fixed regs:\n");
1184      PRINT_STATE;
1185      vex_printf("\n");
1186#     endif
1187
1188
1189      /* ------ Deal with the current instruction. ------ */
1190
1191      /* Finally we can begin the processing of this instruction
1192         itself.  The aim is to free up enough rregs for this insn.
1193         This may generate spill stores since we may have to evict
1194         some vregs currently in rregs.  Also generates spill loads.
1195         We also build up the final vreg->rreg mapping to be applied
1196         to the insn. */
1197
1198      (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1199
1200      initHRegRemap(&remap);
1201
1202      /* ------------ BEGIN directReload optimisation ----------- */
1203
1204      /* If the instruction reads exactly one vreg which is currently
1205         in a spill slot, and this is last use of that vreg, see if we
1206         can convert the instruction into one reads directly from the
1207         spill slot.  This is clearly only possible for x86 and amd64
1208         targets, since ppc and arm load-store architectures.  If
1209         successful, replace instrs_in->arr[ii] with this new
1210         instruction, and recompute its reg usage, so that the change
1211         is invisible to the standard-case handling that follows. */
1212
1213      if (directReload && reg_usage.n_used <= 2) {
1214         Bool  debug_direct_reload = True && False;
1215         HReg  cand     = INVALID_HREG;
1216         Bool  nreads   = 0;
1217         Short spilloff = 0;
1218
1219         for (j = 0; j < reg_usage.n_used; j++) {
1220
1221            vreg = reg_usage.hreg[j];
1222
1223            if (!hregIsVirtual(vreg))
1224               continue;
1225
1226            if (reg_usage.mode[j] == HRmRead) {
1227               nreads++;
1228               m = hregNumber(vreg);
1229               vassert(IS_VALID_VREGNO(m));
1230               k = vreg_state[m];
1231               if (!IS_VALID_RREGNO(k)) {
1232                  /* ok, it is spilled.  Now, is this its last use? */
1233                  vassert(vreg_lrs[m].dead_before >= ii+1);
1234                  if (vreg_lrs[m].dead_before == ii+1
1235                      && hregIsInvalid(cand)) {
1236                     spilloff = vreg_lrs[m].spill_offset;
1237                     cand = vreg;
1238                  }
1239               }
1240            }
1241         }
1242
1243         if (nreads == 1 && ! hregIsInvalid(cand)) {
1244            HInstr* reloaded;
1245            if (reg_usage.n_used == 2)
1246               vassert(! sameHReg(reg_usage.hreg[0], reg_usage.hreg[1]));
1247
1248            reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1249            if (debug_direct_reload && !reloaded) {
1250               vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1251               ppInstr(instrs_in->arr[ii], mode64);
1252            }
1253            if (reloaded) {
1254               /* Update info about the insn, so it looks as if it had
1255                  been in this form all along. */
1256               instrs_in->arr[ii] = reloaded;
1257               (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1258               if (debug_direct_reload && !reloaded) {
1259                  vex_printf("  -->  ");
1260                  ppInstr(reloaded, mode64);
1261               }
1262            }
1263
1264            if (debug_direct_reload && !reloaded)
1265               vex_printf("\n");
1266         }
1267
1268      }
1269
1270      /* ------------ END directReload optimisation ------------ */
1271
1272      /* for each reg mentioned in the insn ... */
1273      for (j = 0; j < reg_usage.n_used; j++) {
1274
1275         vreg = reg_usage.hreg[j];
1276
1277         /* only interested in virtual registers right now. */
1278         if (!hregIsVirtual(vreg))
1279            continue;
1280
1281#        if 0
1282         vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1283#        endif
1284
1285         /* Now we're trying to find a rreg for "vreg".  First of all,
1286            if it already has an rreg assigned, we don't need to do
1287            anything more.  Search the current state to find out. */
1288         m = hregNumber(vreg);
1289         vassert(IS_VALID_VREGNO(m));
1290         k = vreg_state[m];
1291         if (IS_VALID_RREGNO(k)) {
1292            vassert(rreg_state[k].disp == Bound);
1293            addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1294            /* If this rreg is written or modified, mark it as different
1295               from any spill slot value. */
1296            if (reg_usage.mode[j] != HRmRead)
1297               rreg_state[k].eq_spill_slot = False;
1298            continue;
1299         } else {
1300            vassert(k == INVALID_RREG_NO);
1301         }
1302
1303         /* No luck.  The next thing to do is see if there is a
1304            currently free rreg available, of the correct class.  If
1305            so, bag it.  NOTE, we could improve this by selecting an
1306            rreg for which the next live-range event is as far ahead
1307            as possible. */
1308         k_suboptimal = -1;
1309         for (k = 0; k < n_rregs; k++) {
1310            if (rreg_state[k].disp != Free
1311                || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1312               continue;
1313            if (rreg_state[k].has_hlrs) {
1314               /* Well, at least we can use k_suboptimal if we really
1315                  have to.  Keep on looking for a better candidate. */
1316               k_suboptimal = k;
1317            } else {
1318               /* Found a preferable reg.  Use it. */
1319               k_suboptimal = -1;
1320               break;
1321            }
1322         }
1323         if (k_suboptimal >= 0)
1324            k = k_suboptimal;
1325
1326         if (k < n_rregs) {
1327            rreg_state[k].disp = Bound;
1328            rreg_state[k].vreg = vreg;
1329            m = hregNumber(vreg);
1330            vassert(IS_VALID_VREGNO(m));
1331            vreg_state[m] = toShort(k);
1332            addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1333            /* Generate a reload if needed.  This only creates needed
1334               reloads because the live range builder for vregs will
1335               guarantee that the first event for a vreg is a write.
1336               Hence, if this reference is not a write, it cannot be
1337               the first reference for this vreg, and so a reload is
1338               indeed needed. */
1339            if (reg_usage.mode[j] != HRmWrite) {
1340               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1341               HInstr* reload1 = NULL;
1342               HInstr* reload2 = NULL;
1343               (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1344                             vreg_lrs[m].spill_offset, mode64 );
1345               vassert(reload1 || reload2); /* can't both be NULL */
1346               if (reload1)
1347                  EMIT_INSTR(reload1);
1348               if (reload2)
1349                  EMIT_INSTR(reload2);
1350               /* This rreg is read or modified by the instruction.
1351                  If it's merely read we can claim it now equals the
1352                  spill slot, but not so if it is modified. */
1353               if (reg_usage.mode[j] == HRmRead) {
1354                  rreg_state[k].eq_spill_slot = True;
1355               } else {
1356                  vassert(reg_usage.mode[j] == HRmModify);
1357                  rreg_state[k].eq_spill_slot = False;
1358               }
1359            } else {
1360               rreg_state[k].eq_spill_slot = False;
1361            }
1362
1363            continue;
1364         }
1365
1366         /* Well, now we have no option but to spill a vreg.  It's
1367            important to make a good choice of vreg to spill, and of
1368            course we need to be careful not to spill a vreg which is
1369            needed by this insn. */
1370
1371         /* First, mark in the rreg_state, those rregs which are not spill
1372            candidates, due to holding a vreg mentioned by this
1373            instruction.  Or being of the wrong class. */
1374         for (k = 0; k < n_rregs; k++) {
1375            rreg_state[k].is_spill_cand = False;
1376            if (rreg_state[k].disp != Bound)
1377               continue;
1378            if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1379               continue;
1380            rreg_state[k].is_spill_cand = True;
1381            for (m = 0; m < reg_usage.n_used; m++) {
1382               if (sameHReg(rreg_state[k].vreg, reg_usage.hreg[m])) {
1383                  rreg_state[k].is_spill_cand = False;
1384                  break;
1385               }
1386            }
1387         }
1388
1389         /* We can choose to spill any rreg satisfying
1390            rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1391            the next use of its associated vreg is as far ahead as
1392            possible, in the hope that this will minimise the number
1393            of consequent reloads required. */
1394         spillee
1395            = findMostDistantlyMentionedVReg (
1396                 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1397
1398         if (spillee == -1) {
1399            /* Hmmmmm.  There don't appear to be any spill candidates.
1400               We're hosed. */
1401            vex_printf("reg_alloc: can't find a register in class: ");
1402            ppHRegClass(hregClass(vreg));
1403            vex_printf("\n");
1404            vpanic("reg_alloc: can't create a free register.");
1405         }
1406
1407         /* Right.  So we're going to spill rreg_state[spillee]. */
1408         vassert(IS_VALID_RREGNO(spillee));
1409         vassert(rreg_state[spillee].disp == Bound);
1410         /* check it's the right class */
1411         vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1412         /* check we're not ejecting the vreg for which we are trying
1413            to free up a register. */
1414         vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1415
1416         m = hregNumber(rreg_state[spillee].vreg);
1417         vassert(IS_VALID_VREGNO(m));
1418
1419         /* So here's the spill store.  Assert that we're spilling a
1420            live vreg. */
1421         vassert(vreg_lrs[m].dead_before > ii);
1422         vassert(vreg_lrs[m].reg_class != HRcINVALID);
1423         if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1424            HInstr* spill1 = NULL;
1425            HInstr* spill2 = NULL;
1426            (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1427                         vreg_lrs[m].spill_offset, mode64 );
1428            vassert(spill1 || spill2); /* can't both be NULL */
1429            if (spill1)
1430               EMIT_INSTR(spill1);
1431            if (spill2)
1432               EMIT_INSTR(spill2);
1433         }
1434
1435         /* Update the rreg_state to reflect the new assignment for this
1436            rreg. */
1437         rreg_state[spillee].vreg = vreg;
1438         vreg_state[m] = INVALID_RREG_NO;
1439
1440         rreg_state[spillee].eq_spill_slot = False; /* be safe */
1441
1442         m = hregNumber(vreg);
1443         vassert(IS_VALID_VREGNO(m));
1444         vreg_state[m] = toShort(spillee);
1445
1446         /* Now, if this vreg is being read or modified (as opposed to
1447            written), we have to generate a reload for it. */
1448         if (reg_usage.mode[j] != HRmWrite) {
1449            vassert(vreg_lrs[m].reg_class != HRcINVALID);
1450            HInstr* reload1 = NULL;
1451            HInstr* reload2 = NULL;
1452            (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1453                          vreg_lrs[m].spill_offset, mode64 );
1454            vassert(reload1 || reload2); /* can't both be NULL */
1455            if (reload1)
1456               EMIT_INSTR(reload1);
1457            if (reload2)
1458               EMIT_INSTR(reload2);
1459            /* This rreg is read or modified by the instruction.
1460               If it's merely read we can claim it now equals the
1461               spill slot, but not so if it is modified. */
1462            if (reg_usage.mode[j] == HRmRead) {
1463               rreg_state[spillee].eq_spill_slot = True;
1464            } else {
1465               vassert(reg_usage.mode[j] == HRmModify);
1466               rreg_state[spillee].eq_spill_slot = False;
1467            }
1468         }
1469
1470         /* So after much twisting and turning, we have vreg mapped to
1471            rreg_state[spillee].rreg.  Note that in the map. */
1472         addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1473
1474      } /* iterate over registers in this instruction. */
1475
1476      /* We've finished clowning around with registers in this instruction.
1477         Three results:
1478         - the running rreg_state[] has been updated
1479         - a suitable vreg->rreg mapping for this instruction has been
1480           constructed
1481         - spill and reload instructions may have been emitted.
1482
1483        The final step is to apply the mapping to the instruction,
1484        and emit that.
1485      */
1486
1487      /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1488      (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1489      EMIT_INSTR( instrs_in->arr[ii] );
1490
1491#     if DEBUG_REGALLOC
1492      vex_printf("After dealing with current insn:\n");
1493      PRINT_STATE;
1494      vex_printf("\n");
1495#     endif
1496
1497      /* ------ Post-instruction actions for fixed rreg uses ------ */
1498
1499      /* Now we need to check for rregs exiting fixed live ranges
1500         after this instruction, and if so mark them as free. */
1501      while (True) {
1502         vassert(rreg_lrs_db_next >= 0);
1503         vassert(rreg_lrs_db_next <= rreg_lrs_used);
1504         if (rreg_lrs_db_next == rreg_lrs_used)
1505            break; /* no more real reg live ranges to consider */
1506         if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1507            break; /* next live range does not yet start */
1508         vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1509         /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1510            range.  Mark it as such in the main rreg_state array. */
1511         for (k = 0; k < n_rregs; k++)
1512           if (sameHReg(rreg_state[k].rreg, rreg_lrs_db[rreg_lrs_db_next].rreg))
1513               break;
1514         /* If this vassertion fails, we don't have an entry for
1515            this rreg.  Which we should. */
1516         vassert(k < n_rregs);
1517         vassert(rreg_state[k].disp == Unavail);
1518         rreg_state[k].disp = Free;
1519         rreg_state[k].vreg = INVALID_HREG;
1520         rreg_state[k].eq_spill_slot = False;
1521
1522         /* check for further rregs leaving HLRs at this point */
1523         rreg_lrs_db_next++;
1524      }
1525
1526#     if DEBUG_REGALLOC
1527      vex_printf("After post-insn actions for fixed regs:\n");
1528      PRINT_STATE;
1529      vex_printf("\n");
1530#     endif
1531
1532   } /* iterate over insns */
1533
1534   /* ------ END: Process each insn in turn. ------ */
1535
1536   /* free(rreg_state); */
1537   /* free(rreg_lrs); */
1538   /* if (vreg_lrs) free(vreg_lrs); */
1539
1540   /* Paranoia */
1541   for (j = 0; j < n_rregs; j++)
1542      vassert(sameHReg(rreg_state[j].rreg, available_real_regs[j]));
1543
1544   vassert(rreg_lrs_la_next == rreg_lrs_used);
1545   vassert(rreg_lrs_db_next == rreg_lrs_used);
1546
1547   return instrs_out;
1548
1549#  undef INVALID_INSTRNO
1550#  undef EMIT_INSTR
1551#  undef PRINT_STATE
1552}
1553
1554
1555
1556/*---------------------------------------------------------------*/
1557/*---                                       host_reg_alloc2.c ---*/
1558/*---------------------------------------------------------------*/
1559