1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_reg_alloc2.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2012 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38
39#include "main_util.h"
40#include "host_generic_regs.h"
41
42/* Set to 1 for lots of debugging output. */
43#define DEBUG_REGALLOC 0
44
45
46/* TODO 27 Oct 04:
47
48   Better consistency checking from what isMove tells us.
49
50   We can possibly do V-V coalescing even when the src is spilled,
51   providing we can arrange for the dst to have the same spill slot.
52
53   Note that state[].hreg is the same as the available real regs.
54
55   Generally rationalise data structures.  */
56
57
58/* Records information on virtual register live ranges.  Computed once
59   and remains unchanged after that. */
60typedef
61   struct {
62      /* Becomes live for the first time after this insn ... */
63      Short live_after;
64      /* Becomes dead for the last time before this insn ... */
65      Short dead_before;
66      /* The "home" spill slot, if needed.  Never changes. */
67      Short spill_offset;
68      Short spill_size;
69      /* What kind of register this is. */
70      HRegClass reg_class;
71   }
72   VRegLR;
73
74
75/* Records information on real-register live ranges.  Computed once
76   and remains unchanged after that. */
77typedef
78   struct {
79      HReg rreg;
80      /* Becomes live after this insn ... */
81      Short live_after;
82      /* Becomes dead before this insn ... */
83      Short dead_before;
84   }
85   RRegLR;
86
87
88/* An array of the following structs (rreg_state) comprises the
89   running state of the allocator.  It indicates what the current
90   disposition of each allocatable real register is.  The array gets
91   updated as the allocator processes instructions. */
92typedef
93   struct {
94      /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95      /* Which rreg is this for? */
96      HReg rreg;
97      /* Is this involved in any HLRs?  (only an optimisation hint) */
98      Bool has_hlrs;
99      /* ------ FIELDS WHICH DO CHANGE ------ */
100      /* 6 May 07: rearranged fields below so the whole struct fits
101         into 16 bytes on both x86 and amd64. */
102      /* Used when .disp == Bound and we are looking for vregs to
103         spill. */
104      Bool is_spill_cand;
105      /* Optimisation: used when .disp == Bound.  Indicates when the
106         rreg has the same value as the spill slot for the associated
107         vreg.  Is safely left at False, and becomes True after a
108         spill store or reload for this rreg. */
109      Bool eq_spill_slot;
110      /* What's it's current disposition? */
111      enum { Free,     /* available for use */
112             Unavail,  /* in a real-reg live range */
113             Bound     /* in use (holding value of some vreg) */
114           }
115           disp;
116      /* If .disp == Bound, what vreg is it bound to? */
117      HReg vreg;
118   }
119   RRegState;
120
121
122/* The allocator also maintains a redundant array of indexes
123   (vreg_state) from vreg numbers back to entries in rreg_state.  It
124   is redundant because iff vreg_state[i] == j then
125   hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126   point at each other.  The purpose of this is to speed up activities
127   which involve looking for a particular vreg: there is no need to
128   scan the rreg_state looking for it, just index directly into
129   vreg_state.  The FAQ "does this vreg already have an associated
130   rreg" is the main beneficiary.
131
132   To indicate, in vreg_state[i], that a given vreg is not currently
133   associated with any rreg, that entry can be set to INVALID_RREG_NO.
134
135   Because the vreg_state entries are signed Shorts, the max number
136   of vregs that can be handed by regalloc is 32767.
137*/
138
139#define INVALID_RREG_NO ((Short)(-1))
140
141#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143
144
145/* Does this instruction mention a particular reg? */
146static Bool instrMentionsReg (
147   void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148   HInstr* instr,
149   HReg r,
150   Bool mode64
151)
152{
153   Int       i;
154   HRegUsage reg_usage;
155   (*getRegUsage)(&reg_usage, instr, mode64);
156   for (i = 0; i < reg_usage.n_used; i++)
157      if (reg_usage.hreg[i] == r)
158         return True;
159   return False;
160}
161
162
163/* Search forward from some given point in the incoming instruction
164   sequence.  Point is to select a virtual register to spill, by
165   finding the vreg which is mentioned as far ahead as possible, in
166   the hope that this will minimise the number of consequent reloads.
167
168   Only do the search for vregs which are Bound in the running state,
169   and for which the .is_spill_cand field is set.  This allows the
170   caller to arbitrarily restrict the set of spill candidates to be
171   considered.
172
173   Returns an index into the state array indicating the (v,r) pair to
174   spill, or -1 if none was found.  */
175static
176Int findMostDistantlyMentionedVReg (
177   void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178   HInstrArray* instrs_in,
179   Int          search_from_instr,
180   RRegState*   state,
181   Int          n_state,
182   Bool         mode64
183)
184{
185   Int k, m;
186   Int furthest_k = -1;
187   Int furthest   = -1;
188   vassert(search_from_instr >= 0);
189   for (k = 0; k < n_state; k++) {
190      if (!state[k].is_spill_cand)
191         continue;
192      vassert(state[k].disp == Bound);
193      for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194         if (instrMentionsReg(getRegUsage,
195                              instrs_in->arr[m], state[k].vreg, mode64))
196            break;
197      }
198      if (m > furthest) {
199         furthest   = m;
200         furthest_k = k;
201      }
202   }
203   return furthest_k;
204}
205
206
207/* Check that this vreg has been assigned a sane spill offset. */
208static inline void sanity_check_spill_offset ( VRegLR* vreg )
209{
210   switch (vreg->reg_class) {
211      case HRcVec128: case HRcFlt64:
212         vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
213      default:
214         vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
215   }
216}
217
218
219/* Double the size of the real-reg live-range array, if needed. */
220static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
221{
222   Int     k;
223   RRegLR* arr2;
224   if (used < *size) return;
225   if (0)
226      vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
227   vassert(used == *size);
228   arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
229   for (k = 0; k < *size; k++)
230      arr2[k] = (*info)[k];
231   *size *= 2;
232   *info = arr2;
233}
234
235
236/* Sort an array of RRegLR entries by either the .live_after or
237   .dead_before fields.  This is performance-critical. */
238static void sortRRLRarray ( RRegLR* arr,
239                            Int size, Bool by_live_after )
240{
241   Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
242                       9841, 29524, 88573, 265720,
243                       797161, 2391484 };
244   Int    lo = 0;
245   Int    hi = size-1;
246   Int    i, j, h, bigN, hp;
247   RRegLR v;
248
249   vassert(size >= 0);
250   if (size == 0)
251      return;
252
253   bigN = hi - lo + 1; if (bigN < 2) return;
254   hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
255
256   if (by_live_after) {
257
258      for ( ; hp >= 0; hp--) {
259         h = incs[hp];
260         for (i = lo + h; i <= hi; i++) {
261            v = arr[i];
262            j = i;
263            while (arr[j-h].live_after > v.live_after) {
264               arr[j] = arr[j-h];
265               j = j - h;
266               if (j <= (lo + h - 1)) break;
267            }
268            arr[j] = v;
269         }
270      }
271
272   } else {
273
274      for ( ; hp >= 0; hp--) {
275         h = incs[hp];
276         for (i = lo + h; i <= hi; i++) {
277            v = arr[i];
278            j = i;
279            while (arr[j-h].dead_before > v.dead_before) {
280               arr[j] = arr[j-h];
281               j = j - h;
282               if (j <= (lo + h - 1)) break;
283            }
284            arr[j] = v;
285         }
286      }
287
288   }
289}
290
291
292/* A target-independent register allocator.  Requires various
293   functions which it uses to deal abstractly with instructions and
294   registers, since it cannot have any target-specific knowledge.
295
296   Returns a new list of instructions, which, as a result of the
297   behaviour of mapRegs, will be in-place modifications of the
298   original instructions.
299
300   Requires that the incoming code has been generated using
301   vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
302   that range is a checked run-time error.
303
304   Takes an expandable array of pointers to unallocated insns.
305   Returns an expandable array of pointers to allocated insns.
306*/
307HInstrArray* doRegisterAllocation (
308
309   /* Incoming virtual-registerised code. */
310   HInstrArray* instrs_in,
311
312   /* An array listing all the real registers the allocator may use,
313      in no particular order. */
314   HReg* available_real_regs,
315   Int   n_available_real_regs,
316
317   /* Return True iff the given insn is a reg-reg move, in which
318      case also return the src and dst regs. */
319   Bool (*isMove) ( HInstr*, HReg*, HReg* ),
320
321   /* Get info about register usage in this insn. */
322   void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
323
324   /* Apply a reg-reg mapping to an insn. */
325   void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
326
327   /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
328      real reg to a spill slot byte offset.  The two leading HInstr**
329      args are out parameters, through which the generated insns are
330      returned.  Also (optionally) a 'directReload' function, which
331      attempts to replace a given instruction by one which reads
332      directly from a specified spill slot.  May be NULL, in which
333      case the optimisation is not attempted. */
334   void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
335   void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
336   HInstr* (*directReload) ( HInstr*, HReg, Short ),
337   Int     guest_sizeB,
338
339   /* For debug printing only. */
340   void (*ppInstr) ( HInstr*, Bool ),
341   void (*ppReg) ( HReg ),
342
343   /* 32/64bit mode */
344   Bool mode64
345)
346{
347#  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
348
349   const Bool eq_spill_opt = True;
350
351   /* Iterators and temporaries. */
352   Int       ii, j, k, m, spillee, k_suboptimal;
353   HReg      rreg, vreg, vregS, vregD;
354   HRegUsage reg_usage;
355
356   /* Info on vregs and rregs.  Computed once and remains
357      unchanged. */
358   Int     n_vregs;
359   VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
360
361   /* We keep two copies of the real-reg live range info, one sorted
362      by .live_after and the other by .dead_before.  First the
363      unsorted info is created in the _la variant is copied into the
364      _db variant.  Once that's done both of them are sorted.
365      We also need two integer cursors which record the next
366      location in the two arrays to consider. */
367   RRegLR* rreg_lrs_la;
368   RRegLR* rreg_lrs_db;
369   Int     rreg_lrs_size;
370   Int     rreg_lrs_used;
371   Int     rreg_lrs_la_next;
372   Int     rreg_lrs_db_next;
373
374   /* Used when constructing vreg_lrs (for allocating stack
375      slots). */
376   Int ss_busy_until_before[N_SPILL64S];
377
378   /* Used when constructing rreg_lrs. */
379   Int* rreg_live_after;
380   Int* rreg_dead_before;
381
382   /* Running state of the core allocation algorithm. */
383   RRegState* rreg_state;  /* [0 .. n_rregs-1] */
384   Int        n_rregs;
385
386   /* .. and the redundant backward map */
387   /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
388      This inplies n_rregs must be <= 32768. */
389   Short*     vreg_state;  /* [0 .. n_vregs-1] */
390
391   /* The vreg -> rreg map constructed and then applied to each
392      instr. */
393   HRegRemap remap;
394
395   /* The output array of instructions. */
396   HInstrArray* instrs_out;
397
398   /* Sanity checks are expensive.  They are only done periodically,
399      not at each insn processed. */
400   Bool do_sanity_check;
401
402   vassert(0 == (guest_sizeB % 32));
403   vassert(0 == (LibVEX_N_SPILL_BYTES % 32));
404   vassert(0 == (N_SPILL64S % 4));
405
406   /* The live range numbers are signed shorts, and so limiting the
407      number of insns to 15000 comfortably guards against them
408      overflowing 32k. */
409   vassert(instrs_in->arr_used <= 15000);
410
411#  define INVALID_INSTRNO (-2)
412
413#  define EMIT_INSTR(_instr)                  \
414      do {                                    \
415        HInstr* _tmp = (_instr);              \
416        if (DEBUG_REGALLOC) {                 \
417           vex_printf("**  ");                \
418           (*ppInstr)(_tmp, mode64);          \
419           vex_printf("\n\n");                \
420        }                                     \
421        addHInstr ( instrs_out, _tmp );       \
422      } while (0)
423
424#   define PRINT_STATE						   \
425      do {							   \
426         Int z, q;						   \
427         for (z = 0; z < n_rregs; z++) {			   \
428            vex_printf("  rreg_state[%2d] = ", z);		   \
429            (*ppReg)(rreg_state[z].rreg);	       		   \
430            vex_printf("  \t");					   \
431            switch (rreg_state[z].disp) {			   \
432               case Free:    vex_printf("Free\n"); break;	   \
433               case Unavail: vex_printf("Unavail\n"); break;	   \
434               case Bound:   vex_printf("BoundTo "); 		   \
435                             (*ppReg)(rreg_state[z].vreg);	   \
436                             vex_printf("\n"); break;		   \
437            }							   \
438         }							   \
439         vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
440         q = 0;                                                    \
441         for (z = 0; z < n_vregs; z++) {                           \
442            if (vreg_state[z] == INVALID_RREG_NO)                  \
443               continue;                                           \
444            vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
445            q++;                                                   \
446            if (q > 0 && (q % 6) == 0)                             \
447               vex_printf("\n    ");                               \
448         }                                                         \
449         vex_printf("\n");                                         \
450      } while (0)
451
452
453   /* --------- Stage 0: set up output array --------- */
454   /* --------- and allocate/initialise running state. --------- */
455
456   instrs_out = newHInstrArray();
457
458   /* ... and initialise running state. */
459   /* n_rregs is no more than a short name for n_available_real_regs. */
460   n_rregs = n_available_real_regs;
461   n_vregs = instrs_in->n_vregs;
462
463   /* If this is not so, vreg_state entries will overflow. */
464   vassert(n_vregs < 32767);
465
466   rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
467   vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
468
469   for (j = 0; j < n_rregs; j++) {
470      rreg_state[j].rreg          = available_real_regs[j];
471      rreg_state[j].has_hlrs      = False;
472      rreg_state[j].disp          = Free;
473      rreg_state[j].vreg          = INVALID_HREG;
474      rreg_state[j].is_spill_cand = False;
475      rreg_state[j].eq_spill_slot = False;
476   }
477
478   for (j = 0; j < n_vregs; j++)
479      vreg_state[j] = INVALID_RREG_NO;
480
481
482   /* --------- Stage 1: compute vreg live ranges. --------- */
483   /* --------- Stage 2: compute rreg live ranges. --------- */
484
485   /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486
487   /* This is relatively simple, because (1) we only seek the complete
488      end-to-end live range of each vreg, and are not interested in
489      any holes in it, and (2) the vregs are conveniently numbered 0
490      .. n_vregs-1, so we can just dump the results in a
491      pre-allocated array. */
492
493   vreg_lrs = NULL;
494   if (n_vregs > 0)
495      vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
496
497   for (j = 0; j < n_vregs; j++) {
498      vreg_lrs[j].live_after     = INVALID_INSTRNO;
499      vreg_lrs[j].dead_before    = INVALID_INSTRNO;
500      vreg_lrs[j].spill_offset   = 0;
501      vreg_lrs[j].spill_size     = 0;
502      vreg_lrs[j].reg_class      = HRcINVALID;
503   }
504
505   /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
506
507   /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
508
509   /* This is more complex than Stage 1, because we need to compute
510      exactly all the live ranges of all the allocatable real regs,
511      and we don't know in advance how many there will be. */
512
513   rreg_lrs_used = 0;
514   rreg_lrs_size = 4;
515   rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
516   rreg_lrs_db = NULL; /* we'll create this later */
517
518   /* We'll need to track live range start/end points seperately for
519      each rreg.  Sigh. */
520   vassert(n_available_real_regs > 0);
521   rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522   rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
523
524   for (j = 0; j < n_available_real_regs; j++) {
525      rreg_live_after[j] =
526      rreg_dead_before[j] = INVALID_INSTRNO;
527   }
528
529   /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
530
531   /* ------ start of ITERATE OVER INSNS ------ */
532
533   for (ii = 0; ii < instrs_in->arr_used; ii++) {
534
535      (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
536
537#     if 0
538      vex_printf("\n%d  stage1: ", ii);
539      (*ppInstr)(instrs_in->arr[ii], mode64);
540      vex_printf("\n");
541      ppHRegUsage(&reg_usage);
542#     endif
543
544      /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
545
546      /* for each reg mentioned in the insn ... */
547      for (j = 0; j < reg_usage.n_used; j++) {
548
549         vreg = reg_usage.hreg[j];
550         /* only interested in virtual registers right now. */
551         if (!hregIsVirtual(vreg))
552            continue;
553         k = hregNumber(vreg);
554         if (k < 0 || k >= n_vregs) {
555            vex_printf("\n");
556            (*ppInstr)(instrs_in->arr[ii], mode64);
557            vex_printf("\n");
558            vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
559            vpanic("doRegisterAllocation: out-of-range vreg");
560         }
561
562         /* Take the opportunity to note its regclass.  We'll need
563            that when allocating spill slots. */
564         if (vreg_lrs[k].reg_class == HRcINVALID) {
565            /* First mention of this vreg. */
566            vreg_lrs[k].reg_class = hregClass(vreg);
567         } else {
568            /* Seen it before, so check for consistency. */
569            vassert(vreg_lrs[k].reg_class == hregClass(vreg));
570         }
571
572         /* Now consider live ranges. */
573         switch (reg_usage.mode[j]) {
574            case HRmRead:
575               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
576                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
577                  vpanic("doRegisterAllocation: "
578                         "first event for vreg is Read");
579               }
580               vreg_lrs[k].dead_before = toShort(ii + 1);
581               break;
582            case HRmWrite:
583               if (vreg_lrs[k].live_after == INVALID_INSTRNO)
584                  vreg_lrs[k].live_after = toShort(ii);
585               vreg_lrs[k].dead_before = toShort(ii + 1);
586               break;
587            case HRmModify:
588               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
589                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
590                  vpanic("doRegisterAllocation: "
591                         "first event for vreg is Modify");
592               }
593               vreg_lrs[k].dead_before = toShort(ii + 1);
594               break;
595            default:
596               vpanic("doRegisterAllocation(1)");
597         } /* switch */
598
599      } /* iterate over registers */
600
601      /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
602
603      /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
604
605      /* for each reg mentioned in the insn ... */
606      for (j = 0; j < reg_usage.n_used; j++) {
607
608         /* Dummy initialisations of flush_la and flush_db to avoid
609            possible bogus uninit-var warnings from gcc. */
610         Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
611         Bool flush;
612
613         rreg = reg_usage.hreg[j];
614
615         /* only interested in real registers right now. */
616         if (hregIsVirtual(rreg))
617            continue;
618
619         /* Furthermore, we're not interested in this rreg unless it's
620            one of the allocatable ones.  For example, it could be a
621            stack pointer register, or some other register beyond our
622            control, in which case we should just ignore it. */
623         for (k = 0; k < n_available_real_regs; k++)
624            if (available_real_regs[k] == rreg)
625               break;
626         if (k == n_available_real_regs)
627            continue; /* not found -- ignore. */
628         flush = False;
629         switch (reg_usage.mode[j]) {
630            case HRmWrite:
631               flush_la = rreg_live_after[k];
632               flush_db = rreg_dead_before[k];
633               if (flush_la != INVALID_INSTRNO
634                   && flush_db != INVALID_INSTRNO)
635                  flush = True;
636               rreg_live_after[k]  = ii;
637               rreg_dead_before[k] = ii+1;
638               break;
639            case HRmRead:
640               if (rreg_live_after[k] == INVALID_INSTRNO) {
641                  vex_printf("\nOFFENDING RREG = ");
642                  (*ppReg)(available_real_regs[k]);
643                  vex_printf("\n");
644                  vex_printf("\nOFFENDING instr = ");
645                  (*ppInstr)(instrs_in->arr[ii], mode64);
646                  vex_printf("\n");
647                  vpanic("doRegisterAllocation: "
648                         "first event for rreg is Read");
649               }
650               rreg_dead_before[k] = ii+1;
651               break;
652            case HRmModify:
653               if (rreg_live_after[k] == INVALID_INSTRNO) {
654                  vex_printf("\nOFFENDING RREG = ");
655                  (*ppReg)(available_real_regs[k]);
656                  vex_printf("\n");
657                  vex_printf("\nOFFENDING instr = ");
658                  (*ppInstr)(instrs_in->arr[ii], mode64);
659                  vex_printf("\n");
660                  vpanic("doRegisterAllocation: "
661                         "first event for rreg is Modify");
662               }
663               rreg_dead_before[k] = ii+1;
664               break;
665            default:
666               vpanic("doRegisterAllocation(2)");
667         }
668
669         if (flush) {
670            vassert(flush_la != INVALID_INSTRNO);
671            vassert(flush_db != INVALID_INSTRNO);
672            ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
673            if (0)
674               vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
675            rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
676            rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
677            rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
678            rreg_lrs_used++;
679         }
680
681      } /* iterate over regs in the instr */
682
683      /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
684
685   } /* iterate over insns */
686
687   /* ------ end of ITERATE OVER INSNS ------ */
688
689   /* ------ start of FINALISE RREG LIVE RANGES ------ */
690
691   /* Now finish up any live ranges left over. */
692   for (j = 0; j < n_available_real_regs; j++) {
693
694#     if 0
695      vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
696                                             rreg_dead_before[j]);
697#     endif
698      vassert( (rreg_live_after[j] == INVALID_INSTRNO
699               && rreg_dead_before[j] == INVALID_INSTRNO)
700              ||
701              (rreg_live_after[j] != INVALID_INSTRNO
702               && rreg_dead_before[j] != INVALID_INSTRNO)
703            );
704
705      if (rreg_live_after[j] == INVALID_INSTRNO)
706         continue;
707
708      ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
709      if (0)
710         vex_printf("FLUSH 2 (%d,%d)\n",
711                    rreg_live_after[j], rreg_dead_before[j]);
712      rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
713      rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
714      rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
715      rreg_lrs_used++;
716   }
717
718   /* Compute summary hints for choosing real regs.  If a real reg is
719      involved in a hard live range, record that fact in the fixed
720      part of the running rreg_state.  Later, when offered a choice between
721      rregs, it's better to choose one which is not marked as having
722      any HLRs, since ones with HLRs may need to be spilled around
723      their HLRs.  Correctness of final assignment is unaffected by
724      this mechanism -- it is only an optimisation. */
725
726   for (j = 0; j < rreg_lrs_used; j++) {
727      rreg = rreg_lrs_la[j].rreg;
728      vassert(!hregIsVirtual(rreg));
729      /* rreg is involved in a HLR.  Record this info in the array, if
730         there is space. */
731      for (k = 0; k < n_rregs; k++)
732         if (rreg_state[k].rreg == rreg)
733            break;
734      vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
735      rreg_state[k].has_hlrs = True;
736   }
737   if (0) {
738      for (j = 0; j < n_rregs; j++) {
739         if (!rreg_state[j].has_hlrs)
740            continue;
741         ppReg(rreg_state[j].rreg);
742         vex_printf(" hinted\n");
743      }
744   }
745
746   /* Finally, copy the _la variant into the _db variant and
747      sort both by their respective fields. */
748   rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
749   for (j = 0; j < rreg_lrs_used; j++)
750      rreg_lrs_db[j] = rreg_lrs_la[j];
751
752   sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
753   sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
754
755   /* And set up the cursors. */
756   rreg_lrs_la_next = 0;
757   rreg_lrs_db_next = 0;
758
759   for (j = 1; j < rreg_lrs_used; j++) {
760      vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
761      vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
762   }
763
764   /* ------ end of FINALISE RREG LIVE RANGES ------ */
765
766#  if DEBUG_REGALLOC
767   for (j = 0; j < n_vregs; j++) {
768      vex_printf("vreg %d:  la = %d,  db = %d\n",
769                 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
770   }
771#  endif
772
773#  if DEBUG_REGALLOC
774   vex_printf("RRegLRs by LA:\n");
775   for (j = 0; j < rreg_lrs_used; j++) {
776      vex_printf("  ");
777      (*ppReg)(rreg_lrs_la[j].rreg);
778      vex_printf("      la = %d,  db = %d\n",
779                 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
780   }
781   vex_printf("RRegLRs by DB:\n");
782   for (j = 0; j < rreg_lrs_used; j++) {
783      vex_printf("  ");
784      (*ppReg)(rreg_lrs_db[j].rreg);
785      vex_printf("      la = %d,  db = %d\n",
786                 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
787   }
788#  endif
789
790   /* --------- Stage 3: allocate spill slots. --------- */
791
792   /* Each spill slot is 8 bytes long.  For vregs which take more than
793      64 bits to spill (classes Flt64 and Vec128), we have to allocate
794      two consecutive spill slots.  For 256 bit registers (class
795      Vec256), we have to allocate four consecutive spill slots.
796
797      For Vec128-class on PowerPC, the spill slot's actual address
798      must be 16-byte aligned.  Since the spill slot's address is
799      computed as an offset from the guest state pointer, and since
800      the user of the generated code must set that pointer to a
801      32-aligned value, we have the residual obligation here of
802      choosing a 16-aligned spill slot offset for Vec128-class values.
803      Since each spill slot is 8 bytes long, that means for
804      Vec128-class values we must allocated a spill slot number which
805      is zero mod 2.
806
807      Similarly, for Vec256 calss on amd64, find a spill slot number
808      which is zero mod 4.  This guarantees it will be 32 byte
809      aligned, which isn't actually necessary on amd64 (we use movUpd
810      etc to spill), but seems like good practice.
811
812      Do a rank-based allocation of vregs to spill slot numbers.  We
813      put as few values as possible in spill slots, but nevertheless
814      need to have a spill slot available for all vregs, just in case.
815   */
816   /* max_ss_no = -1; */
817
818   for (j = 0; j < N_SPILL64S; j++)
819      ss_busy_until_before[j] = 0;
820
821   for (j = 0; j < n_vregs; j++) {
822
823      /* True iff this vreg is unused.  In which case we also expect
824         that the reg_class field for it has not been set.  */
825      if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
826         vassert(vreg_lrs[j].reg_class == HRcINVALID);
827         continue;
828      }
829
830      /* The spill slots are 64 bits in size.  As per the comment on
831         definition of HRegClass in host_generic_regs.h, that means,
832         to spill a vreg of class Flt64 or Vec128, we'll need to find
833         two adjacent spill slots to use.  For Vec256, we'll need to
834         find four adjacent slots to use.  Note, this logic needs to
835         kept in sync with the size info on the definition of
836         HRegClass. */
837      switch (vreg_lrs[j].reg_class) {
838
839         case HRcVec128: case HRcFlt64:
840            /* Find two adjacent free slots in which between them
841               provide up to 128 bits in which to spill the vreg.
842               Since we are trying to find an even:odd pair, move
843               along in steps of 2 (slots). */
844            for (k = 0; k < N_SPILL64S-1; k += 2)
845               if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
846                   && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
847                  break;
848            if (k >= N_SPILL64S-1) {
849               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
850                      "Increase and recompile.");
851            }
852            if (0) vex_printf("16-byte spill offset in spill slot %d\n",
853                              (Int)k);
854            ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
855            ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
856            break;
857
858         default:
859            /* The ordinary case -- just find a single spill slot. */
860            /* Find the lowest-numbered spill slot which is available
861               at the start point of this interval, and assign the
862               interval to it. */
863            for (k = 0; k < N_SPILL64S; k++)
864               if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
865                  break;
866            if (k == N_SPILL64S) {
867               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
868                      "Increase and recompile.");
869            }
870            ss_busy_until_before[k] = vreg_lrs[j].dead_before;
871            break;
872
873      } /* switch (vreg_lrs[j].reg_class) { */
874
875      /* This reflects LibVEX's hard-wired knowledge of the baseBlock
876         layout: the guest state, then two equal sized areas following
877         it for two sets of shadow state, and then the spill area. */
878      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
879
880      /* Independent check that we've made a sane choice of slot */
881      sanity_check_spill_offset( &vreg_lrs[j] );
882      /* if (j > max_ss_no) */
883      /*    max_ss_no = j; */
884   }
885
886#  if 0
887   vex_printf("\n\n");
888   for (j = 0; j < n_vregs; j++)
889      vex_printf("vreg %d    --> spill offset %d\n",
890                 j, vreg_lrs[j].spill_offset);
891#  endif
892
893   /* --------- Stage 4: establish rreg preferences --------- */
894
895   /* It may be advantageous to allocating certain vregs to specific
896      rregs, as a way of avoiding reg-reg moves later.  Here we
897      establish which, if any, rreg each vreg would prefer to be in.
898      Note that this constrains the allocator -- ideally we end up
899      with as few as possible vregs expressing a preference.
900
901      This is an optimisation: if the .preferred_rreg field is never
902      set to anything different from INVALID_HREG, the allocator still
903      works. */
904
905   /* 30 Dec 04: removed this mechanism as it does not seem to
906      help. */
907
908   /* --------- Stage 5: process instructions --------- */
909
910   /* This is the main loop of the allocator.  First, we need to
911      correctly set up our running state, which tracks the status of
912      each real register. */
913
914   /* ------ BEGIN: Process each insn in turn. ------ */
915
916   for (ii = 0; ii < instrs_in->arr_used; ii++) {
917
918#     if DEBUG_REGALLOC
919      vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
920      vex_printf("---- ");
921      (*ppInstr)(instrs_in->arr[ii], mode64);
922      vex_printf("\n\nInitial state:\n");
923      PRINT_STATE;
924      vex_printf("\n");
925#     endif
926
927      /* ------------ Sanity checks ------------ */
928
929      /* Sanity checks are expensive.  So they are done only once
930         every 7 instructions, and just before the last
931         instruction. */
932      do_sanity_check
933         = toBool(
934              False  /* Set to True for sanity checking of all insns. */
935              || ii == instrs_in->arr_used-1
936              || (ii > 0 && (ii % 7) == 0)
937           );
938
939      if (do_sanity_check) {
940
941         /* Sanity check 1: all rregs with a hard live range crossing
942            this insn must be marked as unavailable in the running
943            state. */
944         for (j = 0; j < rreg_lrs_used; j++) {
945            if (rreg_lrs_la[j].live_after < ii
946                && ii < rreg_lrs_la[j].dead_before) {
947               /* ii is the middle of a hard live range for some real
948                  reg.  Check it's marked as such in the running
949                  state. */
950
951#              if 0
952               vex_printf("considering la %d .. db %d   reg = ",
953                          rreg_lrs[j].live_after,
954                          rreg_lrs[j].dead_before);
955               (*ppReg)(rreg_lrs[j].rreg);
956               vex_printf("\n");
957#              endif
958
959               /* find the state entry for this rreg */
960               for (k = 0; k < n_rregs; k++)
961                  if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
962                     break;
963
964               /* and assert that this rreg is marked as unavailable */
965               vassert(rreg_state[k].disp == Unavail);
966            }
967         }
968
969         /* Sanity check 2: conversely, all rregs marked as
970            unavailable in the running rreg_state must have a
971            corresponding hard live range entry in the rreg_lrs
972            array. */
973         for (j = 0; j < n_available_real_regs; j++) {
974            vassert(rreg_state[j].disp == Bound
975                    || rreg_state[j].disp == Free
976                    || rreg_state[j].disp == Unavail);
977            if (rreg_state[j].disp != Unavail)
978               continue;
979            for (k = 0; k < rreg_lrs_used; k++)
980               if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
981                   && rreg_lrs_la[k].live_after < ii
982                   && ii < rreg_lrs_la[k].dead_before)
983                  break;
984            /* If this vassertion fails, we couldn't find a
985               corresponding HLR. */
986            vassert(k < rreg_lrs_used);
987         }
988
989         /* Sanity check 3: all vreg-rreg bindings must bind registers
990            of the same class. */
991         for (j = 0; j < n_rregs; j++) {
992            if (rreg_state[j].disp != Bound) {
993               vassert(rreg_state[j].eq_spill_slot == False);
994               continue;
995            }
996            vassert(hregClass(rreg_state[j].rreg)
997                    == hregClass(rreg_state[j].vreg));
998            vassert( hregIsVirtual(rreg_state[j].vreg));
999            vassert(!hregIsVirtual(rreg_state[j].rreg));
1000         }
1001
1002         /* Sanity check 4: the vreg_state and rreg_state
1003            mutually-redundant mappings are consistent.  If
1004            rreg_state[j].vreg points at some vreg_state entry then
1005            that vreg_state entry should point back at
1006            rreg_state[j]. */
1007         for (j = 0; j < n_rregs; j++) {
1008            if (rreg_state[j].disp != Bound)
1009               continue;
1010            k = hregNumber(rreg_state[j].vreg);
1011            vassert(IS_VALID_VREGNO(k));
1012            vassert(vreg_state[k] == j);
1013         }
1014         for (j = 0; j < n_vregs; j++) {
1015            k = vreg_state[j];
1016            if (k == INVALID_RREG_NO)
1017               continue;
1018            vassert(IS_VALID_RREGNO(k));
1019            vassert(rreg_state[k].disp == Bound);
1020            vassert(hregNumber(rreg_state[k].vreg) == j);
1021         }
1022
1023      } /* if (do_sanity_check) */
1024
1025      /* ------------ end of Sanity checks ------------ */
1026
1027      /* Do various optimisations pertaining to register coalescing
1028         and preferencing:
1029            MOV  v <-> v   coalescing (done here).
1030            MOV  v <-> r   coalescing (not yet, if ever)
1031      */
1032      /* If doing a reg-reg move between two vregs, and the src's live
1033         range ends here and the dst's live range starts here, bind
1034         the dst to the src's rreg, and that's all. */
1035      if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1036         if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1037         if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1038         /* Check that *isMove is not telling us a bunch of lies ... */
1039         vassert(hregClass(vregS) == hregClass(vregD));
1040         k = hregNumber(vregS);
1041         m = hregNumber(vregD);
1042         vassert(IS_VALID_VREGNO(k));
1043         vassert(IS_VALID_VREGNO(m));
1044         if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1045         if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1046#        if DEBUG_REGALLOC
1047         vex_printf("COALESCE ");
1048         (*ppReg)(vregS);
1049         vex_printf(" -> ");
1050         (*ppReg)(vregD);
1051         vex_printf("\n\n");
1052#        endif
1053         /* Find the state entry for vregS. */
1054         for (m = 0; m < n_rregs; m++)
1055            if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
1056               break;
1057         if (m == n_rregs)
1058            /* We failed to find a binding for vregS, which means it's
1059               currently not in a register.  So we can't do the
1060               coalescing.  Give up. */
1061            goto cannot_coalesce;
1062
1063         /* Finally, we can do the coalescing.  It's trivial -- merely
1064            claim vregS's register for vregD. */
1065         rreg_state[m].vreg = vregD;
1066         vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1067         vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1068         vreg_state[hregNumber(vregD)] = toShort(m);
1069         vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1070
1071         /* This rreg has become associated with a different vreg and
1072            hence with a different spill slot.  Play safe. */
1073         rreg_state[m].eq_spill_slot = False;
1074
1075         /* Move on to the next insn.  We skip the post-insn stuff for
1076            fixed registers, since this move should not interact with
1077            them in any way. */
1078         continue;
1079      }
1080     cannot_coalesce:
1081
1082      /* ------ Free up rregs bound to dead vregs ------ */
1083
1084      /* Look for vregs whose live range has just ended, and
1085	 mark the associated rreg as free. */
1086
1087      for (j = 0; j < n_rregs; j++) {
1088         if (rreg_state[j].disp != Bound)
1089            continue;
1090         vreg = hregNumber(rreg_state[j].vreg);
1091         vassert(IS_VALID_VREGNO(vreg));
1092         if (vreg_lrs[vreg].dead_before <= ii) {
1093            rreg_state[j].disp = Free;
1094            rreg_state[j].eq_spill_slot = False;
1095            m = hregNumber(rreg_state[j].vreg);
1096            vassert(IS_VALID_VREGNO(m));
1097            vreg_state[m] = INVALID_RREG_NO;
1098            if (DEBUG_REGALLOC) {
1099               vex_printf("free up ");
1100               (*ppReg)(rreg_state[j].rreg);
1101               vex_printf("\n");
1102            }
1103         }
1104      }
1105
1106      /* ------ Pre-instruction actions for fixed rreg uses ------ */
1107
1108      /* Now we have to deal with rregs which are about to be made
1109         live by this instruction -- in other words, are entering into
1110         one of their live ranges.  If any such rreg holds a vreg, we
1111         will have to free up the rreg.  The simplest solution which
1112         is correct is to spill the rreg.
1113
1114         Note we could do better:
1115         * Could move it into some other free rreg, if one is available
1116
1117         Do this efficiently, by incrementally stepping along an array
1118         of rreg HLRs that are known to be sorted by start point
1119         (their .live_after field).
1120      */
1121      while (True) {
1122         vassert(rreg_lrs_la_next >= 0);
1123         vassert(rreg_lrs_la_next <= rreg_lrs_used);
1124         if (rreg_lrs_la_next == rreg_lrs_used)
1125            break; /* no more real reg live ranges to consider */
1126         if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1127            break; /* next live range does not yet start */
1128         vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1129         /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1130            Find the associated rreg_state entry. */
1131         /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1132            Real register live ranges are guaranteed to be well-formed
1133            in that they start with a write to the register -- Stage 2
1134            rejects any code not satisfying this.  So the correct
1135            question to ask is whether
1136            rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1137            whether the reg becomes live after this insn -- rather
1138            than before it. */
1139#        if DEBUG_REGALLOC
1140         vex_printf("need to free up rreg: ");
1141         (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1142         vex_printf("\n\n");
1143#        endif
1144         for (k = 0; k < n_rregs; k++)
1145            if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
1146               break;
1147         /* If this fails, we don't have an entry for this rreg.
1148            Which we should. */
1149         vassert(IS_VALID_RREGNO(k));
1150         m = hregNumber(rreg_state[k].vreg);
1151         if (rreg_state[k].disp == Bound) {
1152            /* Yes, there is an associated vreg.  Spill it if it's
1153               still live. */
1154            vassert(IS_VALID_VREGNO(m));
1155            vreg_state[m] = INVALID_RREG_NO;
1156            if (vreg_lrs[m].dead_before > ii) {
1157               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1158               if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1159                  HInstr* spill1 = NULL;
1160                  HInstr* spill2 = NULL;
1161                  (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1162                               vreg_lrs[m].spill_offset, mode64 );
1163                  vassert(spill1 || spill2); /* can't both be NULL */
1164                  if (spill1)
1165                     EMIT_INSTR(spill1);
1166                  if (spill2)
1167                     EMIT_INSTR(spill2);
1168               }
1169               rreg_state[k].eq_spill_slot = True;
1170            }
1171         }
1172         rreg_state[k].disp = Unavail;
1173         rreg_state[k].vreg = INVALID_HREG;
1174         rreg_state[k].eq_spill_slot = False;
1175
1176         /* check for further rregs entering HLRs at this point */
1177         rreg_lrs_la_next++;
1178      }
1179
1180
1181#     if DEBUG_REGALLOC
1182      vex_printf("After pre-insn actions for fixed regs:\n");
1183      PRINT_STATE;
1184      vex_printf("\n");
1185#     endif
1186
1187
1188      /* ------ Deal with the current instruction. ------ */
1189
1190      /* Finally we can begin the processing of this instruction
1191         itself.  The aim is to free up enough rregs for this insn.
1192         This may generate spill stores since we may have to evict
1193         some vregs currently in rregs.  Also generates spill loads.
1194         We also build up the final vreg->rreg mapping to be applied
1195         to the insn. */
1196
1197      (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1198
1199      initHRegRemap(&remap);
1200
1201      /* ------------ BEGIN directReload optimisation ----------- */
1202
1203      /* If the instruction reads exactly one vreg which is currently
1204         in a spill slot, and this is last use of that vreg, see if we
1205         can convert the instruction into one reads directly from the
1206         spill slot.  This is clearly only possible for x86 and amd64
1207         targets, since ppc and arm load-store architectures.  If
1208         successful, replace instrs_in->arr[ii] with this new
1209         instruction, and recompute its reg usage, so that the change
1210         is invisible to the standard-case handling that follows. */
1211
1212      if (directReload && reg_usage.n_used <= 2) {
1213         Bool  debug_direct_reload = True && False;
1214         HReg  cand     = INVALID_HREG;
1215         Bool  nreads   = 0;
1216         Short spilloff = 0;
1217
1218         for (j = 0; j < reg_usage.n_used; j++) {
1219
1220            vreg = reg_usage.hreg[j];
1221
1222            if (!hregIsVirtual(vreg))
1223               continue;
1224
1225            if (reg_usage.mode[j] == HRmRead) {
1226               nreads++;
1227               m = hregNumber(vreg);
1228               vassert(IS_VALID_VREGNO(m));
1229               k = vreg_state[m];
1230               if (!IS_VALID_RREGNO(k)) {
1231                  /* ok, it is spilled.  Now, is this its last use? */
1232                  vassert(vreg_lrs[m].dead_before >= ii+1);
1233                  if (vreg_lrs[m].dead_before == ii+1
1234                      && cand == INVALID_HREG) {
1235                     spilloff = vreg_lrs[m].spill_offset;
1236                     cand = vreg;
1237                  }
1238               }
1239            }
1240         }
1241
1242         if (nreads == 1 && cand != INVALID_HREG) {
1243            HInstr* reloaded;
1244            if (reg_usage.n_used == 2)
1245               vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
1246
1247            reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1248            if (debug_direct_reload && !reloaded) {
1249               vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1250               ppInstr(instrs_in->arr[ii], mode64);
1251            }
1252            if (reloaded) {
1253               /* Update info about the insn, so it looks as if it had
1254                  been in this form all along. */
1255               instrs_in->arr[ii] = reloaded;
1256               (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1257               if (debug_direct_reload && !reloaded) {
1258                  vex_printf("  -->  ");
1259                  ppInstr(reloaded, mode64);
1260               }
1261            }
1262
1263            if (debug_direct_reload && !reloaded)
1264               vex_printf("\n");
1265         }
1266
1267      }
1268
1269      /* ------------ END directReload optimisation ------------ */
1270
1271      /* for each reg mentioned in the insn ... */
1272      for (j = 0; j < reg_usage.n_used; j++) {
1273
1274         vreg = reg_usage.hreg[j];
1275
1276         /* only interested in virtual registers right now. */
1277         if (!hregIsVirtual(vreg))
1278            continue;
1279
1280#        if 0
1281         vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1282#        endif
1283
1284         /* Now we're trying to find a rreg for "vreg".  First of all,
1285            if it already has an rreg assigned, we don't need to do
1286            anything more.  Search the current state to find out. */
1287         m = hregNumber(vreg);
1288         vassert(IS_VALID_VREGNO(m));
1289         k = vreg_state[m];
1290         if (IS_VALID_RREGNO(k)) {
1291            vassert(rreg_state[k].disp == Bound);
1292            addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1293            /* If this rreg is written or modified, mark it as different
1294               from any spill slot value. */
1295            if (reg_usage.mode[j] != HRmRead)
1296               rreg_state[k].eq_spill_slot = False;
1297            continue;
1298         } else {
1299            vassert(k == INVALID_RREG_NO);
1300         }
1301
1302         /* No luck.  The next thing to do is see if there is a
1303            currently free rreg available, of the correct class.  If
1304            so, bag it.  NOTE, we could improve this by selecting an
1305            rreg for which the next live-range event is as far ahead
1306            as possible. */
1307         k_suboptimal = -1;
1308         for (k = 0; k < n_rregs; k++) {
1309            if (rreg_state[k].disp != Free
1310                || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1311               continue;
1312            if (rreg_state[k].has_hlrs) {
1313               /* Well, at least we can use k_suboptimal if we really
1314                  have to.  Keep on looking for a better candidate. */
1315               k_suboptimal = k;
1316            } else {
1317               /* Found a preferable reg.  Use it. */
1318               k_suboptimal = -1;
1319               break;
1320            }
1321         }
1322         if (k_suboptimal >= 0)
1323            k = k_suboptimal;
1324
1325         if (k < n_rregs) {
1326            rreg_state[k].disp = Bound;
1327            rreg_state[k].vreg = vreg;
1328            m = hregNumber(vreg);
1329            vassert(IS_VALID_VREGNO(m));
1330            vreg_state[m] = toShort(k);
1331            addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1332            /* Generate a reload if needed.  This only creates needed
1333               reloads because the live range builder for vregs will
1334               guarantee that the first event for a vreg is a write.
1335               Hence, if this reference is not a write, it cannot be
1336               the first reference for this vreg, and so a reload is
1337               indeed needed. */
1338            if (reg_usage.mode[j] != HRmWrite) {
1339               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1340               HInstr* reload1 = NULL;
1341               HInstr* reload2 = NULL;
1342               (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1343                             vreg_lrs[m].spill_offset, mode64 );
1344               vassert(reload1 || reload2); /* can't both be NULL */
1345               if (reload1)
1346                  EMIT_INSTR(reload1);
1347               if (reload2)
1348                  EMIT_INSTR(reload2);
1349               /* This rreg is read or modified by the instruction.
1350                  If it's merely read we can claim it now equals the
1351                  spill slot, but not so if it is modified. */
1352               if (reg_usage.mode[j] == HRmRead) {
1353                  rreg_state[k].eq_spill_slot = True;
1354               } else {
1355                  vassert(reg_usage.mode[j] == HRmModify);
1356                  rreg_state[k].eq_spill_slot = False;
1357               }
1358            } else {
1359               rreg_state[k].eq_spill_slot = False;
1360            }
1361
1362            continue;
1363         }
1364
1365         /* Well, now we have no option but to spill a vreg.  It's
1366            important to make a good choice of vreg to spill, and of
1367            course we need to be careful not to spill a vreg which is
1368            needed by this insn. */
1369
1370         /* First, mark in the rreg_state, those rregs which are not spill
1371            candidates, due to holding a vreg mentioned by this
1372            instruction.  Or being of the wrong class. */
1373         for (k = 0; k < n_rregs; k++) {
1374            rreg_state[k].is_spill_cand = False;
1375            if (rreg_state[k].disp != Bound)
1376               continue;
1377            if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1378               continue;
1379            rreg_state[k].is_spill_cand = True;
1380            for (m = 0; m < reg_usage.n_used; m++) {
1381               if (rreg_state[k].vreg == reg_usage.hreg[m]) {
1382                  rreg_state[k].is_spill_cand = False;
1383                  break;
1384               }
1385            }
1386         }
1387
1388         /* We can choose to spill any rreg satisfying
1389            rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1390            the next use of its associated vreg is as far ahead as
1391            possible, in the hope that this will minimise the number
1392            of consequent reloads required. */
1393         spillee
1394            = findMostDistantlyMentionedVReg (
1395                 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1396
1397         if (spillee == -1) {
1398            /* Hmmmmm.  There don't appear to be any spill candidates.
1399               We're hosed. */
1400            vex_printf("reg_alloc: can't find a register in class: ");
1401            ppHRegClass(hregClass(vreg));
1402            vex_printf("\n");
1403            vpanic("reg_alloc: can't create a free register.");
1404         }
1405
1406         /* Right.  So we're going to spill rreg_state[spillee]. */
1407         vassert(IS_VALID_RREGNO(spillee));
1408         vassert(rreg_state[spillee].disp == Bound);
1409         /* check it's the right class */
1410         vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1411         /* check we're not ejecting the vreg for which we are trying
1412            to free up a register. */
1413         vassert(rreg_state[spillee].vreg != vreg);
1414
1415         m = hregNumber(rreg_state[spillee].vreg);
1416         vassert(IS_VALID_VREGNO(m));
1417
1418         /* So here's the spill store.  Assert that we're spilling a
1419            live vreg. */
1420         vassert(vreg_lrs[m].dead_before > ii);
1421         vassert(vreg_lrs[m].reg_class != HRcINVALID);
1422         if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1423            HInstr* spill1 = NULL;
1424            HInstr* spill2 = NULL;
1425            (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1426                         vreg_lrs[m].spill_offset, mode64 );
1427            vassert(spill1 || spill2); /* can't both be NULL */
1428            if (spill1)
1429               EMIT_INSTR(spill1);
1430            if (spill2)
1431               EMIT_INSTR(spill2);
1432         }
1433
1434         /* Update the rreg_state to reflect the new assignment for this
1435            rreg. */
1436         rreg_state[spillee].vreg = vreg;
1437         vreg_state[m] = INVALID_RREG_NO;
1438
1439         rreg_state[spillee].eq_spill_slot = False; /* be safe */
1440
1441         m = hregNumber(vreg);
1442         vassert(IS_VALID_VREGNO(m));
1443         vreg_state[m] = toShort(spillee);
1444
1445         /* Now, if this vreg is being read or modified (as opposed to
1446            written), we have to generate a reload for it. */
1447         if (reg_usage.mode[j] != HRmWrite) {
1448            vassert(vreg_lrs[m].reg_class != HRcINVALID);
1449            HInstr* reload1 = NULL;
1450            HInstr* reload2 = NULL;
1451            (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1452                          vreg_lrs[m].spill_offset, mode64 );
1453            vassert(reload1 || reload2); /* can't both be NULL */
1454            if (reload1)
1455               EMIT_INSTR(reload1);
1456            if (reload2)
1457               EMIT_INSTR(reload2);
1458            /* This rreg is read or modified by the instruction.
1459               If it's merely read we can claim it now equals the
1460               spill slot, but not so if it is modified. */
1461            if (reg_usage.mode[j] == HRmRead) {
1462               rreg_state[spillee].eq_spill_slot = True;
1463            } else {
1464               vassert(reg_usage.mode[j] == HRmModify);
1465               rreg_state[spillee].eq_spill_slot = False;
1466            }
1467         }
1468
1469         /* So after much twisting and turning, we have vreg mapped to
1470            rreg_state[spillee].rreg.  Note that in the map. */
1471         addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1472
1473      } /* iterate over registers in this instruction. */
1474
1475      /* We've finished clowning around with registers in this instruction.
1476         Three results:
1477         - the running rreg_state[] has been updated
1478         - a suitable vreg->rreg mapping for this instruction has been
1479           constructed
1480         - spill and reload instructions may have been emitted.
1481
1482        The final step is to apply the mapping to the instruction,
1483        and emit that.
1484      */
1485
1486      /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1487      (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1488      EMIT_INSTR( instrs_in->arr[ii] );
1489
1490#     if DEBUG_REGALLOC
1491      vex_printf("After dealing with current insn:\n");
1492      PRINT_STATE;
1493      vex_printf("\n");
1494#     endif
1495
1496      /* ------ Post-instruction actions for fixed rreg uses ------ */
1497
1498      /* Now we need to check for rregs exiting fixed live ranges
1499         after this instruction, and if so mark them as free. */
1500      while (True) {
1501         vassert(rreg_lrs_db_next >= 0);
1502         vassert(rreg_lrs_db_next <= rreg_lrs_used);
1503         if (rreg_lrs_db_next == rreg_lrs_used)
1504            break; /* no more real reg live ranges to consider */
1505         if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1506            break; /* next live range does not yet start */
1507         vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1508         /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1509            range.  Mark it as such in the main rreg_state array. */
1510         for (k = 0; k < n_rregs; k++)
1511            if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
1512               break;
1513         /* If this vassertion fails, we don't have an entry for
1514            this rreg.  Which we should. */
1515         vassert(k < n_rregs);
1516         vassert(rreg_state[k].disp == Unavail);
1517         rreg_state[k].disp = Free;
1518         rreg_state[k].vreg = INVALID_HREG;
1519         rreg_state[k].eq_spill_slot = False;
1520
1521         /* check for further rregs leaving HLRs at this point */
1522         rreg_lrs_db_next++;
1523      }
1524
1525#     if DEBUG_REGALLOC
1526      vex_printf("After post-insn actions for fixed regs:\n");
1527      PRINT_STATE;
1528      vex_printf("\n");
1529#     endif
1530
1531   } /* iterate over insns */
1532
1533   /* ------ END: Process each insn in turn. ------ */
1534
1535   /* free(rreg_state); */
1536   /* free(rreg_lrs); */
1537   /* if (vreg_lrs) free(vreg_lrs); */
1538
1539   /* Paranoia */
1540   for (j = 0; j < n_rregs; j++)
1541      vassert(rreg_state[j].rreg == available_real_regs[j]);
1542
1543   vassert(rreg_lrs_la_next == rreg_lrs_used);
1544   vassert(rreg_lrs_db_next == rreg_lrs_used);
1545
1546   return instrs_out;
1547
1548#  undef INVALID_INSTRNO
1549#  undef EMIT_INSTR
1550#  undef PRINT_STATE
1551}
1552
1553
1554
1555/*---------------------------------------------------------------*/
1556/*---                                       host_reg_alloc2.c ---*/
1557/*---------------------------------------------------------------*/
1558