1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_reg_alloc2.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2011 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38
39#include "main_util.h"
40#include "host_generic_regs.h"
41
42/* Set to 1 for lots of debugging output. */
43#define DEBUG_REGALLOC 0
44
45
46/* TODO 27 Oct 04:
47
48   Better consistency checking from what isMove tells us.
49
50   We can possibly do V-V coalescing even when the src is spilled,
51   providing we can arrange for the dst to have the same spill slot.
52
53   Note that state[].hreg is the same as the available real regs.
54
55   Generally rationalise data structures.  */
56
57
58/* Records information on virtual register live ranges.  Computed once
59   and remains unchanged after that. */
60typedef
61   struct {
62      /* Becomes live for the first time after this insn ... */
63      Short live_after;
64      /* Becomes dead for the last time before this insn ... */
65      Short dead_before;
66      /* The "home" spill slot, if needed.  Never changes. */
67      Short spill_offset;
68      Short spill_size;
69      /* What kind of register this is. */
70      HRegClass reg_class;
71   }
72   VRegLR;
73
74
75/* Records information on real-register live ranges.  Computed once
76   and remains unchanged after that. */
77typedef
78   struct {
79      HReg rreg;
80      /* Becomes live after this insn ... */
81      Short live_after;
82      /* Becomes dead before this insn ... */
83      Short dead_before;
84   }
85   RRegLR;
86
87
88/* An array of the following structs (rreg_state) comprises the
89   running state of the allocator.  It indicates what the current
90   disposition of each allocatable real register is.  The array gets
91   updated as the allocator processes instructions. */
92typedef
93   struct {
94      /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95      /* Which rreg is this for? */
96      HReg rreg;
97      /* Is this involved in any HLRs?  (only an optimisation hint) */
98      Bool has_hlrs;
99      /* ------ FIELDS WHICH DO CHANGE ------ */
100      /* 6 May 07: rearranged fields below so the whole struct fits
101         into 16 bytes on both x86 and amd64. */
102      /* Used when .disp == Bound and we are looking for vregs to
103         spill. */
104      Bool is_spill_cand;
105      /* Optimisation: used when .disp == Bound.  Indicates when the
106         rreg has the same value as the spill slot for the associated
107         vreg.  Is safely left at False, and becomes True after a
108         spill store or reload for this rreg. */
109      Bool eq_spill_slot;
110      /* What's it's current disposition? */
111      enum { Free,     /* available for use */
112             Unavail,  /* in a real-reg live range */
113             Bound     /* in use (holding value of some vreg) */
114           }
115           disp;
116      /* If .disp == Bound, what vreg is it bound to? */
117      HReg vreg;
118   }
119   RRegState;
120
121
122/* The allocator also maintains a redundant array of indexes
123   (vreg_state) from vreg numbers back to entries in rreg_state.  It
124   is redundant because iff vreg_state[i] == j then
125   hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126   point at each other.  The purpose of this is to speed up activities
127   which involve looking for a particular vreg: there is no need to
128   scan the rreg_state looking for it, just index directly into
129   vreg_state.  The FAQ "does this vreg already have an associated
130   rreg" is the main beneficiary.
131
132   To indicate, in vreg_state[i], that a given vreg is not currently
133   associated with any rreg, that entry can be set to INVALID_RREG_NO.
134
135   Because the vreg_state entries are signed Shorts, the max number
136   of vregs that can be handed by regalloc is 32767.
137*/
138
139#define INVALID_RREG_NO ((Short)(-1))
140
141#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143
144
145/* Does this instruction mention a particular reg? */
146static Bool instrMentionsReg (
147   void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148   HInstr* instr,
149   HReg r,
150   Bool mode64
151)
152{
153   Int       i;
154   HRegUsage reg_usage;
155   (*getRegUsage)(&reg_usage, instr, mode64);
156   for (i = 0; i < reg_usage.n_used; i++)
157      if (reg_usage.hreg[i] == r)
158         return True;
159   return False;
160}
161
162
163/* Search forward from some given point in the incoming instruction
164   sequence.  Point is to select a virtual register to spill, by
165   finding the vreg which is mentioned as far ahead as possible, in
166   the hope that this will minimise the number of consequent reloads.
167
168   Only do the search for vregs which are Bound in the running state,
169   and for which the .is_spill_cand field is set.  This allows the
170   caller to arbitrarily restrict the set of spill candidates to be
171   considered.
172
173   Returns an index into the state array indicating the (v,r) pair to
174   spill, or -1 if none was found.  */
175static
176Int findMostDistantlyMentionedVReg (
177   void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178   HInstrArray* instrs_in,
179   Int          search_from_instr,
180   RRegState*   state,
181   Int          n_state,
182   Bool         mode64
183)
184{
185   Int k, m;
186   Int furthest_k = -1;
187   Int furthest   = -1;
188   vassert(search_from_instr >= 0);
189   for (k = 0; k < n_state; k++) {
190      if (!state[k].is_spill_cand)
191         continue;
192      vassert(state[k].disp == Bound);
193      for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194         if (instrMentionsReg(getRegUsage,
195                              instrs_in->arr[m], state[k].vreg, mode64))
196            break;
197      }
198      if (m > furthest) {
199         furthest   = m;
200         furthest_k = k;
201      }
202   }
203   return furthest_k;
204}
205
206
207/* Check that this vreg has been assigned a sane spill offset. */
208static inline void sanity_check_spill_offset ( VRegLR* vreg )
209{
210   if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) {
211      vassert(0 == ((UShort)vreg->spill_offset % 16));
212   } else {
213      vassert(0 == ((UShort)vreg->spill_offset % 8));
214   }
215}
216
217
218/* Double the size of the real-reg live-range array, if needed. */
219static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
220{
221   Int     k;
222   RRegLR* arr2;
223   if (used < *size) return;
224   if (0)
225      vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
226   vassert(used == *size);
227   arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
228   for (k = 0; k < *size; k++)
229      arr2[k] = (*info)[k];
230   *size *= 2;
231   *info = arr2;
232}
233
234
235/* Sort an array of RRegLR entries by either the .live_after or
236   .dead_before fields.  This is performance-critical. */
237static void sortRRLRarray ( RRegLR* arr,
238                            Int size, Bool by_live_after )
239{
240   Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
241                       9841, 29524, 88573, 265720,
242                       797161, 2391484 };
243   Int    lo = 0;
244   Int    hi = size-1;
245   Int    i, j, h, bigN, hp;
246   RRegLR v;
247
248   vassert(size >= 0);
249   if (size == 0)
250      return;
251
252   bigN = hi - lo + 1; if (bigN < 2) return;
253   hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
254
255   if (by_live_after) {
256
257      for ( ; hp >= 0; hp--) {
258         h = incs[hp];
259         for (i = lo + h; i <= hi; i++) {
260            v = arr[i];
261            j = i;
262            while (arr[j-h].live_after > v.live_after) {
263               arr[j] = arr[j-h];
264               j = j - h;
265               if (j <= (lo + h - 1)) break;
266            }
267            arr[j] = v;
268         }
269      }
270
271   } else {
272
273      for ( ; hp >= 0; hp--) {
274         h = incs[hp];
275         for (i = lo + h; i <= hi; i++) {
276            v = arr[i];
277            j = i;
278            while (arr[j-h].dead_before > v.dead_before) {
279               arr[j] = arr[j-h];
280               j = j - h;
281               if (j <= (lo + h - 1)) break;
282            }
283            arr[j] = v;
284         }
285      }
286
287   }
288}
289
290
291/* A target-independent register allocator.  Requires various
292   functions which it uses to deal abstractly with instructions and
293   registers, since it cannot have any target-specific knowledge.
294
295   Returns a new list of instructions, which, as a result of the
296   behaviour of mapRegs, will be in-place modifications of the
297   original instructions.
298
299   Requires that the incoming code has been generated using
300   vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
301   that range is a checked run-time error.
302
303   Takes an expandable array of pointers to unallocated insns.
304   Returns an expandable array of pointers to allocated insns.
305*/
306HInstrArray* doRegisterAllocation (
307
308   /* Incoming virtual-registerised code. */
309   HInstrArray* instrs_in,
310
311   /* An array listing all the real registers the allocator may use,
312      in no particular order. */
313   HReg* available_real_regs,
314   Int   n_available_real_regs,
315
316   /* Return True iff the given insn is a reg-reg move, in which
317      case also return the src and dst regs. */
318   Bool (*isMove) ( HInstr*, HReg*, HReg* ),
319
320   /* Get info about register usage in this insn. */
321   void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
322
323   /* Apply a reg-reg mapping to an insn. */
324   void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
325
326   /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
327      real reg to a spill slot byte offset.  The two leading HInstr**
328      args are out parameters, through which the generated insns are
329      returned.  Also (optionally) a 'directReload' function, which
330      attempts to replace a given instruction by one which reads
331      directly from a specified spill slot.  May be NULL, in which
332      case the optimisation is not attempted. */
333   void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
334   void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
335   HInstr* (*directReload) ( HInstr*, HReg, Short ),
336   Int     guest_sizeB,
337
338   /* For debug printing only. */
339   void (*ppInstr) ( HInstr*, Bool ),
340   void (*ppReg) ( HReg ),
341
342   /* 32/64bit mode */
343   Bool mode64
344)
345{
346#  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
347
348   const Bool eq_spill_opt = True;
349
350   /* Iterators and temporaries. */
351   Int       ii, j, k, m, spillee, k_suboptimal;
352   HReg      rreg, vreg, vregS, vregD;
353   HRegUsage reg_usage;
354
355   /* Info on vregs and rregs.  Computed once and remains
356      unchanged. */
357   Int     n_vregs;
358   VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
359
360   /* We keep two copies of the real-reg live range info, one sorted
361      by .live_after and the other by .dead_before.  First the
362      unsorted info is created in the _la variant is copied into the
363      _db variant.  Once that's done both of them are sorted.
364      We also need two integer cursors which record the next
365      location in the two arrays to consider. */
366   RRegLR* rreg_lrs_la;
367   RRegLR* rreg_lrs_db;
368   Int     rreg_lrs_size;
369   Int     rreg_lrs_used;
370   Int     rreg_lrs_la_next;
371   Int     rreg_lrs_db_next;
372
373   /* Used when constructing vreg_lrs (for allocating stack
374      slots). */
375   Int ss_busy_until_before[N_SPILL64S];
376
377   /* Used when constructing rreg_lrs. */
378   Int* rreg_live_after;
379   Int* rreg_dead_before;
380
381   /* Running state of the core allocation algorithm. */
382   RRegState* rreg_state;  /* [0 .. n_rregs-1] */
383   Int        n_rregs;
384
385   /* .. and the redundant backward map */
386   /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
387      This inplies n_rregs must be <= 32768. */
388   Short*     vreg_state;  /* [0 .. n_vregs-1] */
389
390   /* The vreg -> rreg map constructed and then applied to each
391      instr. */
392   HRegRemap remap;
393
394   /* The output array of instructions. */
395   HInstrArray* instrs_out;
396
397   /* Sanity checks are expensive.  They are only done periodically,
398      not at each insn processed. */
399   Bool do_sanity_check;
400
401   vassert(0 == (guest_sizeB % 16));
402   vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
403   vassert(0 == (N_SPILL64S % 2));
404
405   /* The live range numbers are signed shorts, and so limiting the
406      number of insns to 10000 comfortably guards against them
407      overflowing 32k. */
408   vassert(instrs_in->arr_used <= 10000);
409
410#  define INVALID_INSTRNO (-2)
411
412#  define EMIT_INSTR(_instr)                  \
413      do {                                    \
414        HInstr* _tmp = (_instr);              \
415        if (DEBUG_REGALLOC) {                 \
416           vex_printf("**  ");                \
417           (*ppInstr)(_tmp, mode64);          \
418           vex_printf("\n\n");                \
419        }                                     \
420        addHInstr ( instrs_out, _tmp );       \
421      } while (0)
422
423#   define PRINT_STATE						   \
424      do {							   \
425         Int z, q;						   \
426         for (z = 0; z < n_rregs; z++) {			   \
427            vex_printf("  rreg_state[%2d] = ", z);		   \
428            (*ppReg)(rreg_state[z].rreg);	       		   \
429            vex_printf("  \t");					   \
430            switch (rreg_state[z].disp) {			   \
431               case Free:    vex_printf("Free\n"); break;	   \
432               case Unavail: vex_printf("Unavail\n"); break;	   \
433               case Bound:   vex_printf("BoundTo "); 		   \
434                             (*ppReg)(rreg_state[z].vreg);	   \
435                             vex_printf("\n"); break;		   \
436            }							   \
437         }							   \
438         vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
439         q = 0;                                                    \
440         for (z = 0; z < n_vregs; z++) {                           \
441            if (vreg_state[z] == INVALID_RREG_NO)                  \
442               continue;                                           \
443            vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
444            q++;                                                   \
445            if (q > 0 && (q % 6) == 0)                             \
446               vex_printf("\n    ");                               \
447         }                                                         \
448         vex_printf("\n");                                         \
449      } while (0)
450
451
452   /* --------- Stage 0: set up output array --------- */
453   /* --------- and allocate/initialise running state. --------- */
454
455   instrs_out = newHInstrArray();
456
457   /* ... and initialise running state. */
458   /* n_rregs is no more than a short name for n_available_real_regs. */
459   n_rregs = n_available_real_regs;
460   n_vregs = instrs_in->n_vregs;
461
462   /* If this is not so, vreg_state entries will overflow. */
463   vassert(n_vregs < 32767);
464
465   rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
466   vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
467
468   for (j = 0; j < n_rregs; j++) {
469      rreg_state[j].rreg          = available_real_regs[j];
470      rreg_state[j].has_hlrs      = False;
471      rreg_state[j].disp          = Free;
472      rreg_state[j].vreg          = INVALID_HREG;
473      rreg_state[j].is_spill_cand = False;
474      rreg_state[j].eq_spill_slot = False;
475   }
476
477   for (j = 0; j < n_vregs; j++)
478      vreg_state[j] = INVALID_RREG_NO;
479
480
481   /* --------- Stage 1: compute vreg live ranges. --------- */
482   /* --------- Stage 2: compute rreg live ranges. --------- */
483
484   /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
485
486   /* This is relatively simple, because (1) we only seek the complete
487      end-to-end live range of each vreg, and are not interested in
488      any holes in it, and (2) the vregs are conveniently numbered 0
489      .. n_vregs-1, so we can just dump the results in a
490      pre-allocated array. */
491
492   vreg_lrs = NULL;
493   if (n_vregs > 0)
494      vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
495
496   for (j = 0; j < n_vregs; j++) {
497      vreg_lrs[j].live_after     = INVALID_INSTRNO;
498      vreg_lrs[j].dead_before    = INVALID_INSTRNO;
499      vreg_lrs[j].spill_offset   = 0;
500      vreg_lrs[j].spill_size     = 0;
501      vreg_lrs[j].reg_class      = HRcINVALID;
502   }
503
504   /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
505
506   /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
507
508   /* This is more complex than Stage 1, because we need to compute
509      exactly all the live ranges of all the allocatable real regs,
510      and we don't know in advance how many there will be. */
511
512   rreg_lrs_used = 0;
513   rreg_lrs_size = 4;
514   rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
515   rreg_lrs_db = NULL; /* we'll create this later */
516
517   /* We'll need to track live range start/end points seperately for
518      each rreg.  Sigh. */
519   vassert(n_available_real_regs > 0);
520   rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
521   rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522
523   for (j = 0; j < n_available_real_regs; j++) {
524      rreg_live_after[j] =
525      rreg_dead_before[j] = INVALID_INSTRNO;
526   }
527
528   /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
529
530   /* ------ start of ITERATE OVER INSNS ------ */
531
532   for (ii = 0; ii < instrs_in->arr_used; ii++) {
533
534      (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
535
536#     if 0
537      vex_printf("\n%d  stage1: ", ii);
538      (*ppInstr)(instrs_in->arr[ii], mode64);
539      vex_printf("\n");
540      ppHRegUsage(&reg_usage);
541#     endif
542
543      /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
544
545      /* for each reg mentioned in the insn ... */
546      for (j = 0; j < reg_usage.n_used; j++) {
547
548         vreg = reg_usage.hreg[j];
549         /* only interested in virtual registers right now. */
550         if (!hregIsVirtual(vreg))
551            continue;
552         k = hregNumber(vreg);
553         if (k < 0 || k >= n_vregs) {
554            vex_printf("\n");
555            (*ppInstr)(instrs_in->arr[ii], mode64);
556            vex_printf("\n");
557            vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
558            vpanic("doRegisterAllocation: out-of-range vreg");
559         }
560
561         /* Take the opportunity to note its regclass.  We'll need
562            that when allocating spill slots. */
563         if (vreg_lrs[k].reg_class == HRcINVALID) {
564            /* First mention of this vreg. */
565            vreg_lrs[k].reg_class = hregClass(vreg);
566         } else {
567            /* Seen it before, so check for consistency. */
568            vassert(vreg_lrs[k].reg_class == hregClass(vreg));
569         }
570
571         /* Now consider live ranges. */
572         switch (reg_usage.mode[j]) {
573            case HRmRead:
574               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
575                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
576                  vpanic("doRegisterAllocation: "
577                         "first event for vreg is Read");
578               }
579               vreg_lrs[k].dead_before = toShort(ii + 1);
580               break;
581            case HRmWrite:
582               if (vreg_lrs[k].live_after == INVALID_INSTRNO)
583                  vreg_lrs[k].live_after = toShort(ii);
584               vreg_lrs[k].dead_before = toShort(ii + 1);
585               break;
586            case HRmModify:
587               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
588                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
589                  vpanic("doRegisterAllocation: "
590                         "first event for vreg is Modify");
591               }
592               vreg_lrs[k].dead_before = toShort(ii + 1);
593               break;
594            default:
595               vpanic("doRegisterAllocation(1)");
596         } /* switch */
597
598      } /* iterate over registers */
599
600      /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
601
602      /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
603
604      /* for each reg mentioned in the insn ... */
605      for (j = 0; j < reg_usage.n_used; j++) {
606
607         /* Dummy initialisations of flush_la and flush_db to avoid
608            possible bogus uninit-var warnings from gcc. */
609         Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
610         Bool flush;
611
612         rreg = reg_usage.hreg[j];
613
614         /* only interested in real registers right now. */
615         if (hregIsVirtual(rreg))
616            continue;
617
618         /* Furthermore, we're not interested in this rreg unless it's
619            one of the allocatable ones.  For example, it could be a
620            stack pointer register, or some other register beyond our
621            control, in which case we should just ignore it. */
622         for (k = 0; k < n_available_real_regs; k++)
623            if (available_real_regs[k] == rreg)
624               break;
625         if (k == n_available_real_regs)
626            continue; /* not found -- ignore. */
627         flush = False;
628         switch (reg_usage.mode[j]) {
629            case HRmWrite:
630               flush_la = rreg_live_after[k];
631               flush_db = rreg_dead_before[k];
632               if (flush_la != INVALID_INSTRNO
633                   && flush_db != INVALID_INSTRNO)
634                  flush = True;
635               rreg_live_after[k]  = ii;
636               rreg_dead_before[k] = ii+1;
637               break;
638            case HRmRead:
639               if (rreg_live_after[k] == INVALID_INSTRNO) {
640                  vex_printf("\nOFFENDING RREG = ");
641                  (*ppReg)(available_real_regs[k]);
642                  vex_printf("\n");
643                  vex_printf("\nOFFENDING instr = ");
644                  (*ppInstr)(instrs_in->arr[ii], mode64);
645                  vex_printf("\n");
646                  vpanic("doRegisterAllocation: "
647                         "first event for rreg is Read");
648               }
649               rreg_dead_before[k] = ii+1;
650               break;
651            case HRmModify:
652               if (rreg_live_after[k] == INVALID_INSTRNO) {
653                  vex_printf("\nOFFENDING RREG = ");
654                  (*ppReg)(available_real_regs[k]);
655                  vex_printf("\n");
656                  vex_printf("\nOFFENDING instr = ");
657                  (*ppInstr)(instrs_in->arr[ii], mode64);
658                  vex_printf("\n");
659                  vpanic("doRegisterAllocation: "
660                         "first event for rreg is Modify");
661               }
662               rreg_dead_before[k] = ii+1;
663               break;
664            default:
665               vpanic("doRegisterAllocation(2)");
666         }
667
668         if (flush) {
669            vassert(flush_la != INVALID_INSTRNO);
670            vassert(flush_db != INVALID_INSTRNO);
671            ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
672            if (0)
673               vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
674            rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
675            rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
676            rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
677            rreg_lrs_used++;
678         }
679
680      } /* iterate over regs in the instr */
681
682      /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
683
684   } /* iterate over insns */
685
686   /* ------ end of ITERATE OVER INSNS ------ */
687
688   /* ------ start of FINALISE RREG LIVE RANGES ------ */
689
690   /* Now finish up any live ranges left over. */
691   for (j = 0; j < n_available_real_regs; j++) {
692
693#     if 0
694      vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
695                                             rreg_dead_before[j]);
696#     endif
697      vassert( (rreg_live_after[j] == INVALID_INSTRNO
698               && rreg_dead_before[j] == INVALID_INSTRNO)
699              ||
700              (rreg_live_after[j] != INVALID_INSTRNO
701               && rreg_dead_before[j] != INVALID_INSTRNO)
702            );
703
704      if (rreg_live_after[j] == INVALID_INSTRNO)
705         continue;
706
707      ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
708      if (0)
709         vex_printf("FLUSH 2 (%d,%d)\n",
710                    rreg_live_after[j], rreg_dead_before[j]);
711      rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
712      rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
713      rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
714      rreg_lrs_used++;
715   }
716
717   /* Compute summary hints for choosing real regs.  If a real reg is
718      involved in a hard live range, record that fact in the fixed
719      part of the running rreg_state.  Later, when offered a choice between
720      rregs, it's better to choose one which is not marked as having
721      any HLRs, since ones with HLRs may need to be spilled around
722      their HLRs.  Correctness of final assignment is unaffected by
723      this mechanism -- it is only an optimisation. */
724
725   for (j = 0; j < rreg_lrs_used; j++) {
726      rreg = rreg_lrs_la[j].rreg;
727      vassert(!hregIsVirtual(rreg));
728      /* rreg is involved in a HLR.  Record this info in the array, if
729         there is space. */
730      for (k = 0; k < n_rregs; k++)
731         if (rreg_state[k].rreg == rreg)
732            break;
733      vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
734      rreg_state[k].has_hlrs = True;
735   }
736   if (0) {
737      for (j = 0; j < n_rregs; j++) {
738         if (!rreg_state[j].has_hlrs)
739            continue;
740         ppReg(rreg_state[j].rreg);
741         vex_printf(" hinted\n");
742      }
743   }
744
745   /* Finally, copy the _la variant into the _db variant and
746      sort both by their respective fields. */
747   rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
748   for (j = 0; j < rreg_lrs_used; j++)
749      rreg_lrs_db[j] = rreg_lrs_la[j];
750
751   sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
752   sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
753
754   /* And set up the cursors. */
755   rreg_lrs_la_next = 0;
756   rreg_lrs_db_next = 0;
757
758   for (j = 1; j < rreg_lrs_used; j++) {
759      vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
760      vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
761   }
762
763   /* ------ end of FINALISE RREG LIVE RANGES ------ */
764
765#  if DEBUG_REGALLOC
766   for (j = 0; j < n_vregs; j++) {
767      vex_printf("vreg %d:  la = %d,  db = %d\n",
768                 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
769   }
770#  endif
771
772#  if DEBUG_REGALLOC
773   vex_printf("RRegLRs by LA:\n");
774   for (j = 0; j < rreg_lrs_used; j++) {
775      vex_printf("  ");
776      (*ppReg)(rreg_lrs_la[j].rreg);
777      vex_printf("      la = %d,  db = %d\n",
778                 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
779   }
780   vex_printf("RRegLRs by DB:\n");
781   for (j = 0; j < rreg_lrs_used; j++) {
782      vex_printf("  ");
783      (*ppReg)(rreg_lrs_db[j].rreg);
784      vex_printf("      la = %d,  db = %d\n",
785                 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
786   }
787#  endif
788
789   /* --------- Stage 3: allocate spill slots. --------- */
790
791   /* Each spill slot is 8 bytes long.  For vregs which take more than
792      64 bits to spill (classes Flt64 and Vec128), we have to allocate
793      two spill slots.
794
795      For Vec128-class on PowerPC, the spill slot's actual address
796      must be 16-byte aligned.  Since the spill slot's address is
797      computed as an offset from the guest state pointer, and since
798      the user of the generated code must set that pointer to a
799      16-aligned value, we have the residual obligation here of
800      choosing a 16-aligned spill slot offset for Vec128-class values.
801      Since each spill slot is 8 bytes long, that means for
802      Vec128-class values we must allocated a spill slot number which
803      is zero mod 2.
804
805      Do a rank-based allocation of vregs to spill slot numbers.  We
806      put as few values as possible in spill slots, but nevertheless
807      need to have a spill slot available for all vregs, just in case.
808   */
809   /* max_ss_no = -1; */
810
811   for (j = 0; j < N_SPILL64S; j++)
812      ss_busy_until_before[j] = 0;
813
814   for (j = 0; j < n_vregs; j++) {
815
816      /* True iff this vreg is unused.  In which case we also expect
817         that the reg_class field for it has not been set.  */
818      if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
819         vassert(vreg_lrs[j].reg_class == HRcINVALID);
820         continue;
821      }
822
823      /* The spill slots are 64 bits in size.  As per the comment on
824         definition of HRegClass in host_generic_regs.h, that means, to
825         spill a vreg of class Flt64 or Vec128, we'll need to find two
826         adjacent spill slots to use.  Note, this logic needs to kept
827         in sync with the size info on the definition of HRegClass. */
828
829      if (vreg_lrs[j].reg_class == HRcVec128
830          || vreg_lrs[j].reg_class == HRcFlt64) {
831
832         /* Find two adjacent free slots in which between them provide
833            up to 128 bits in which to spill the vreg.  Since we are
834            trying to find an even:odd pair, move along in steps of 2
835            (slots). */
836
837         for (k = 0; k < N_SPILL64S-1; k += 2)
838            if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
839                && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
840               break;
841         if (k >= N_SPILL64S-1) {
842            vpanic("LibVEX_N_SPILL_BYTES is too low.  "
843                   "Increase and recompile.");
844         }
845         if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k);
846         ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
847         ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
848
849      } else {
850
851         /* The ordinary case -- just find a single spill slot. */
852
853         /* Find the lowest-numbered spill slot which is available at
854            the start point of this interval, and assign the interval
855            to it. */
856         for (k = 0; k < N_SPILL64S; k++)
857            if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
858               break;
859         if (k == N_SPILL64S) {
860            vpanic("LibVEX_N_SPILL_BYTES is too low.  "
861                   "Increase and recompile.");
862         }
863         ss_busy_until_before[k] = vreg_lrs[j].dead_before;
864
865      }
866
867      /* This reflects LibVEX's hard-wired knowledge of the baseBlock
868         layout: the guest state, then two equal sized areas following
869         it for two sets of shadow state, and then the spill area. */
870      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
871
872      /* Independent check that we've made a sane choice of slot */
873      sanity_check_spill_offset( &vreg_lrs[j] );
874      /* if (j > max_ss_no) */
875      /*    max_ss_no = j; */
876   }
877
878#  if 0
879   vex_printf("\n\n");
880   for (j = 0; j < n_vregs; j++)
881      vex_printf("vreg %d    --> spill offset %d\n",
882                 j, vreg_lrs[j].spill_offset);
883#  endif
884
885   /* --------- Stage 4: establish rreg preferences --------- */
886
887   /* It may be advantageous to allocating certain vregs to specific
888      rregs, as a way of avoiding reg-reg moves later.  Here we
889      establish which, if any, rreg each vreg would prefer to be in.
890      Note that this constrains the allocator -- ideally we end up
891      with as few as possible vregs expressing a preference.
892
893      This is an optimisation: if the .preferred_rreg field is never
894      set to anything different from INVALID_HREG, the allocator still
895      works. */
896
897   /* 30 Dec 04: removed this mechanism as it does not seem to
898      help. */
899
900   /* --------- Stage 5: process instructions --------- */
901
902   /* This is the main loop of the allocator.  First, we need to
903      correctly set up our running state, which tracks the status of
904      each real register. */
905
906   /* ------ BEGIN: Process each insn in turn. ------ */
907
908   for (ii = 0; ii < instrs_in->arr_used; ii++) {
909
910#     if DEBUG_REGALLOC
911      vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
912      vex_printf("---- ");
913      (*ppInstr)(instrs_in->arr[ii], mode64);
914      vex_printf("\n\nInitial state:\n");
915      PRINT_STATE;
916      vex_printf("\n");
917#     endif
918
919      /* ------------ Sanity checks ------------ */
920
921      /* Sanity checks are expensive.  So they are done only once
922         every 7 instructions, and just before the last
923         instruction. */
924      do_sanity_check
925         = toBool(
926              False  /* Set to True for sanity checking of all insns. */
927              || ii == instrs_in->arr_used-1
928              || (ii > 0 && (ii % 7) == 0)
929           );
930
931      if (do_sanity_check) {
932
933         /* Sanity check 1: all rregs with a hard live range crossing
934            this insn must be marked as unavailable in the running
935            state. */
936         for (j = 0; j < rreg_lrs_used; j++) {
937            if (rreg_lrs_la[j].live_after < ii
938                && ii < rreg_lrs_la[j].dead_before) {
939               /* ii is the middle of a hard live range for some real
940                  reg.  Check it's marked as such in the running
941                  state. */
942
943#              if 0
944               vex_printf("considering la %d .. db %d   reg = ",
945                          rreg_lrs[j].live_after,
946                          rreg_lrs[j].dead_before);
947               (*ppReg)(rreg_lrs[j].rreg);
948               vex_printf("\n");
949#              endif
950
951               /* find the state entry for this rreg */
952               for (k = 0; k < n_rregs; k++)
953                  if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
954                     break;
955
956               /* and assert that this rreg is marked as unavailable */
957               vassert(rreg_state[k].disp == Unavail);
958            }
959         }
960
961         /* Sanity check 2: conversely, all rregs marked as
962            unavailable in the running rreg_state must have a
963            corresponding hard live range entry in the rreg_lrs
964            array. */
965         for (j = 0; j < n_available_real_regs; j++) {
966            vassert(rreg_state[j].disp == Bound
967                    || rreg_state[j].disp == Free
968                    || rreg_state[j].disp == Unavail);
969            if (rreg_state[j].disp != Unavail)
970               continue;
971            for (k = 0; k < rreg_lrs_used; k++)
972               if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
973                   && rreg_lrs_la[k].live_after < ii
974                   && ii < rreg_lrs_la[k].dead_before)
975                  break;
976            /* If this vassertion fails, we couldn't find a
977               corresponding HLR. */
978            vassert(k < rreg_lrs_used);
979         }
980
981         /* Sanity check 3: all vreg-rreg bindings must bind registers
982            of the same class. */
983         for (j = 0; j < n_rregs; j++) {
984            if (rreg_state[j].disp != Bound) {
985               vassert(rreg_state[j].eq_spill_slot == False);
986               continue;
987            }
988            vassert(hregClass(rreg_state[j].rreg)
989                    == hregClass(rreg_state[j].vreg));
990            vassert( hregIsVirtual(rreg_state[j].vreg));
991            vassert(!hregIsVirtual(rreg_state[j].rreg));
992         }
993
994         /* Sanity check 4: the vreg_state and rreg_state
995            mutually-redundant mappings are consistent.  If
996            rreg_state[j].vreg points at some vreg_state entry then
997            that vreg_state entry should point back at
998            rreg_state[j]. */
999         for (j = 0; j < n_rregs; j++) {
1000            if (rreg_state[j].disp != Bound)
1001               continue;
1002            k = hregNumber(rreg_state[j].vreg);
1003            vassert(IS_VALID_VREGNO(k));
1004            vassert(vreg_state[k] == j);
1005         }
1006         for (j = 0; j < n_vregs; j++) {
1007            k = vreg_state[j];
1008            if (k == INVALID_RREG_NO)
1009               continue;
1010            vassert(IS_VALID_RREGNO(k));
1011            vassert(rreg_state[k].disp == Bound);
1012            vassert(hregNumber(rreg_state[k].vreg) == j);
1013         }
1014
1015      } /* if (do_sanity_check) */
1016
1017      /* ------------ end of Sanity checks ------------ */
1018
1019      /* Do various optimisations pertaining to register coalescing
1020         and preferencing:
1021            MOV  v <-> v   coalescing (done here).
1022            MOV  v <-> r   coalescing (not yet, if ever)
1023      */
1024      /* If doing a reg-reg move between two vregs, and the src's live
1025         range ends here and the dst's live range starts here, bind
1026         the dst to the src's rreg, and that's all. */
1027      if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1028         if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1029         if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1030         /* Check that *isMove is not telling us a bunch of lies ... */
1031         vassert(hregClass(vregS) == hregClass(vregD));
1032         k = hregNumber(vregS);
1033         m = hregNumber(vregD);
1034         vassert(IS_VALID_VREGNO(k));
1035         vassert(IS_VALID_VREGNO(m));
1036         if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1037         if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1038#        if DEBUG_REGALLOC
1039         vex_printf("COALESCE ");
1040         (*ppReg)(vregS);
1041         vex_printf(" -> ");
1042         (*ppReg)(vregD);
1043         vex_printf("\n\n");
1044#        endif
1045         /* Find the state entry for vregS. */
1046         for (m = 0; m < n_rregs; m++)
1047            if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
1048               break;
1049         if (m == n_rregs)
1050            /* We failed to find a binding for vregS, which means it's
1051               currently not in a register.  So we can't do the
1052               coalescing.  Give up. */
1053            goto cannot_coalesce;
1054
1055         /* Finally, we can do the coalescing.  It's trivial -- merely
1056            claim vregS's register for vregD. */
1057         rreg_state[m].vreg = vregD;
1058         vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1059         vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1060         vreg_state[hregNumber(vregD)] = toShort(m);
1061         vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1062
1063         /* This rreg has become associated with a different vreg and
1064            hence with a different spill slot.  Play safe. */
1065         rreg_state[m].eq_spill_slot = False;
1066
1067         /* Move on to the next insn.  We skip the post-insn stuff for
1068            fixed registers, since this move should not interact with
1069            them in any way. */
1070         continue;
1071      }
1072     cannot_coalesce:
1073
1074      /* ------ Free up rregs bound to dead vregs ------ */
1075
1076      /* Look for vregs whose live range has just ended, and
1077	 mark the associated rreg as free. */
1078
1079      for (j = 0; j < n_rregs; j++) {
1080         if (rreg_state[j].disp != Bound)
1081            continue;
1082         vreg = hregNumber(rreg_state[j].vreg);
1083         vassert(IS_VALID_VREGNO(vreg));
1084         if (vreg_lrs[vreg].dead_before <= ii) {
1085            rreg_state[j].disp = Free;
1086            rreg_state[j].eq_spill_slot = False;
1087            m = hregNumber(rreg_state[j].vreg);
1088            vassert(IS_VALID_VREGNO(m));
1089            vreg_state[m] = INVALID_RREG_NO;
1090            if (DEBUG_REGALLOC) {
1091               vex_printf("free up ");
1092               (*ppReg)(rreg_state[j].rreg);
1093               vex_printf("\n");
1094            }
1095         }
1096      }
1097
1098      /* ------ Pre-instruction actions for fixed rreg uses ------ */
1099
1100      /* Now we have to deal with rregs which are about to be made
1101         live by this instruction -- in other words, are entering into
1102         one of their live ranges.  If any such rreg holds a vreg, we
1103         will have to free up the rreg.  The simplest solution which
1104         is correct is to spill the rreg.
1105
1106         Note we could do better:
1107         * Could move it into some other free rreg, if one is available
1108
1109         Do this efficiently, by incrementally stepping along an array
1110         of rreg HLRs that are known to be sorted by start point
1111         (their .live_after field).
1112      */
1113      while (True) {
1114         vassert(rreg_lrs_la_next >= 0);
1115         vassert(rreg_lrs_la_next <= rreg_lrs_used);
1116         if (rreg_lrs_la_next == rreg_lrs_used)
1117            break; /* no more real reg live ranges to consider */
1118         if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1119            break; /* next live range does not yet start */
1120         vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1121         /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1122            Find the associated rreg_state entry. */
1123         /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1124            Real register live ranges are guaranteed to be well-formed
1125            in that they start with a write to the register -- Stage 2
1126            rejects any code not satisfying this.  So the correct
1127            question to ask is whether
1128            rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1129            whether the reg becomes live after this insn -- rather
1130            than before it. */
1131#        if DEBUG_REGALLOC
1132         vex_printf("need to free up rreg: ");
1133         (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1134         vex_printf("\n\n");
1135#        endif
1136         for (k = 0; k < n_rregs; k++)
1137            if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
1138               break;
1139         /* If this fails, we don't have an entry for this rreg.
1140            Which we should. */
1141         vassert(IS_VALID_RREGNO(k));
1142         m = hregNumber(rreg_state[k].vreg);
1143         if (rreg_state[k].disp == Bound) {
1144            /* Yes, there is an associated vreg.  Spill it if it's
1145               still live. */
1146            vassert(IS_VALID_VREGNO(m));
1147            vreg_state[m] = INVALID_RREG_NO;
1148            if (vreg_lrs[m].dead_before > ii) {
1149               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1150               if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1151                  HInstr* spill1 = NULL;
1152                  HInstr* spill2 = NULL;
1153                  (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1154                               vreg_lrs[m].spill_offset, mode64 );
1155                  vassert(spill1 || spill2); /* can't both be NULL */
1156                  if (spill1)
1157                     EMIT_INSTR(spill1);
1158                  if (spill2)
1159                     EMIT_INSTR(spill2);
1160               }
1161               rreg_state[k].eq_spill_slot = True;
1162            }
1163         }
1164         rreg_state[k].disp = Unavail;
1165         rreg_state[k].vreg = INVALID_HREG;
1166         rreg_state[k].eq_spill_slot = False;
1167
1168         /* check for further rregs entering HLRs at this point */
1169         rreg_lrs_la_next++;
1170      }
1171
1172
1173#     if DEBUG_REGALLOC
1174      vex_printf("After pre-insn actions for fixed regs:\n");
1175      PRINT_STATE;
1176      vex_printf("\n");
1177#     endif
1178
1179
1180      /* ------ Deal with the current instruction. ------ */
1181
1182      /* Finally we can begin the processing of this instruction
1183         itself.  The aim is to free up enough rregs for this insn.
1184         This may generate spill stores since we may have to evict
1185         some vregs currently in rregs.  Also generates spill loads.
1186         We also build up the final vreg->rreg mapping to be applied
1187         to the insn. */
1188
1189      (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1190
1191      initHRegRemap(&remap);
1192
1193      /* ------------ BEGIN directReload optimisation ----------- */
1194
1195      /* If the instruction reads exactly one vreg which is currently
1196         in a spill slot, and this is last use of that vreg, see if we
1197         can convert the instruction into one reads directly from the
1198         spill slot.  This is clearly only possible for x86 and amd64
1199         targets, since ppc and arm load-store architectures.  If
1200         successful, replace instrs_in->arr[ii] with this new
1201         instruction, and recompute its reg usage, so that the change
1202         is invisible to the standard-case handling that follows. */
1203
1204      if (directReload && reg_usage.n_used <= 2) {
1205         Bool  debug_direct_reload = True && False;
1206         HReg  cand     = INVALID_HREG;
1207         Bool  nreads   = 0;
1208         Short spilloff = 0;
1209
1210         for (j = 0; j < reg_usage.n_used; j++) {
1211
1212            vreg = reg_usage.hreg[j];
1213
1214            if (!hregIsVirtual(vreg))
1215               continue;
1216
1217            if (reg_usage.mode[j] == HRmRead) {
1218               nreads++;
1219               m = hregNumber(vreg);
1220               vassert(IS_VALID_VREGNO(m));
1221               k = vreg_state[m];
1222               if (!IS_VALID_RREGNO(k)) {
1223                  /* ok, it is spilled.  Now, is this its last use? */
1224                  vassert(vreg_lrs[m].dead_before >= ii+1);
1225                  if (vreg_lrs[m].dead_before == ii+1
1226                      && cand == INVALID_HREG) {
1227                     spilloff = vreg_lrs[m].spill_offset;
1228                     cand = vreg;
1229                  }
1230               }
1231            }
1232         }
1233
1234         if (nreads == 1 && cand != INVALID_HREG) {
1235            HInstr* reloaded;
1236            if (reg_usage.n_used == 2)
1237               vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
1238
1239            reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1240            if (debug_direct_reload && !reloaded) {
1241               vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1242               ppInstr(instrs_in->arr[ii], mode64);
1243            }
1244            if (reloaded) {
1245               /* Update info about the insn, so it looks as if it had
1246                  been in this form all along. */
1247               instrs_in->arr[ii] = reloaded;
1248               (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1249               if (debug_direct_reload && !reloaded) {
1250                  vex_printf("  -->  ");
1251                  ppInstr(reloaded, mode64);
1252               }
1253            }
1254
1255            if (debug_direct_reload && !reloaded)
1256               vex_printf("\n");
1257         }
1258
1259      }
1260
1261      /* ------------ END directReload optimisation ------------ */
1262
1263      /* for each reg mentioned in the insn ... */
1264      for (j = 0; j < reg_usage.n_used; j++) {
1265
1266         vreg = reg_usage.hreg[j];
1267
1268         /* only interested in virtual registers right now. */
1269         if (!hregIsVirtual(vreg))
1270            continue;
1271
1272#        if 0
1273         vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1274#        endif
1275
1276         /* Now we're trying to find a rreg for "vreg".  First of all,
1277            if it already has an rreg assigned, we don't need to do
1278            anything more.  Search the current state to find out. */
1279         m = hregNumber(vreg);
1280         vassert(IS_VALID_VREGNO(m));
1281         k = vreg_state[m];
1282         if (IS_VALID_RREGNO(k)) {
1283            vassert(rreg_state[k].disp == Bound);
1284            addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1285            /* If this rreg is written or modified, mark it as different
1286               from any spill slot value. */
1287            if (reg_usage.mode[j] != HRmRead)
1288               rreg_state[k].eq_spill_slot = False;
1289            continue;
1290         } else {
1291            vassert(k == INVALID_RREG_NO);
1292         }
1293
1294         /* No luck.  The next thing to do is see if there is a
1295            currently free rreg available, of the correct class.  If
1296            so, bag it.  NOTE, we could improve this by selecting an
1297            rreg for which the next live-range event is as far ahead
1298            as possible. */
1299         k_suboptimal = -1;
1300         for (k = 0; k < n_rregs; k++) {
1301            if (rreg_state[k].disp != Free
1302                || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1303               continue;
1304            if (rreg_state[k].has_hlrs) {
1305               /* Well, at least we can use k_suboptimal if we really
1306                  have to.  Keep on looking for a better candidate. */
1307               k_suboptimal = k;
1308            } else {
1309               /* Found a preferable reg.  Use it. */
1310               k_suboptimal = -1;
1311               break;
1312            }
1313         }
1314         if (k_suboptimal >= 0)
1315            k = k_suboptimal;
1316
1317         if (k < n_rregs) {
1318            rreg_state[k].disp = Bound;
1319            rreg_state[k].vreg = vreg;
1320            m = hregNumber(vreg);
1321            vassert(IS_VALID_VREGNO(m));
1322            vreg_state[m] = toShort(k);
1323            addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1324            /* Generate a reload if needed.  This only creates needed
1325               reloads because the live range builder for vregs will
1326               guarantee that the first event for a vreg is a write.
1327               Hence, if this reference is not a write, it cannot be
1328               the first reference for this vreg, and so a reload is
1329               indeed needed. */
1330            if (reg_usage.mode[j] != HRmWrite) {
1331               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1332               HInstr* reload1 = NULL;
1333               HInstr* reload2 = NULL;
1334               (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1335                             vreg_lrs[m].spill_offset, mode64 );
1336               vassert(reload1 || reload2); /* can't both be NULL */
1337               if (reload1)
1338                  EMIT_INSTR(reload1);
1339               if (reload2)
1340                  EMIT_INSTR(reload2);
1341               /* This rreg is read or modified by the instruction.
1342                  If it's merely read we can claim it now equals the
1343                  spill slot, but not so if it is modified. */
1344               if (reg_usage.mode[j] == HRmRead) {
1345                  rreg_state[k].eq_spill_slot = True;
1346               } else {
1347                  vassert(reg_usage.mode[j] == HRmModify);
1348                  rreg_state[k].eq_spill_slot = False;
1349               }
1350            } else {
1351               rreg_state[k].eq_spill_slot = False;
1352            }
1353
1354            continue;
1355         }
1356
1357         /* Well, now we have no option but to spill a vreg.  It's
1358            important to make a good choice of vreg to spill, and of
1359            course we need to be careful not to spill a vreg which is
1360            needed by this insn. */
1361
1362         /* First, mark in the rreg_state, those rregs which are not spill
1363            candidates, due to holding a vreg mentioned by this
1364            instruction.  Or being of the wrong class. */
1365         for (k = 0; k < n_rregs; k++) {
1366            rreg_state[k].is_spill_cand = False;
1367            if (rreg_state[k].disp != Bound)
1368               continue;
1369            if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1370               continue;
1371            rreg_state[k].is_spill_cand = True;
1372            for (m = 0; m < reg_usage.n_used; m++) {
1373               if (rreg_state[k].vreg == reg_usage.hreg[m]) {
1374                  rreg_state[k].is_spill_cand = False;
1375                  break;
1376               }
1377            }
1378         }
1379
1380         /* We can choose to spill any rreg satisfying
1381            rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1382            the next use of its associated vreg is as far ahead as
1383            possible, in the hope that this will minimise the number
1384            of consequent reloads required. */
1385         spillee
1386            = findMostDistantlyMentionedVReg (
1387                 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1388
1389         if (spillee == -1) {
1390            /* Hmmmmm.  There don't appear to be any spill candidates.
1391               We're hosed. */
1392            vex_printf("reg_alloc: can't find a register in class: ");
1393            ppHRegClass(hregClass(vreg));
1394            vex_printf("\n");
1395            vpanic("reg_alloc: can't create a free register.");
1396         }
1397
1398         /* Right.  So we're going to spill rreg_state[spillee]. */
1399         vassert(IS_VALID_RREGNO(spillee));
1400         vassert(rreg_state[spillee].disp == Bound);
1401         /* check it's the right class */
1402         vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1403         /* check we're not ejecting the vreg for which we are trying
1404            to free up a register. */
1405         vassert(rreg_state[spillee].vreg != vreg);
1406
1407         m = hregNumber(rreg_state[spillee].vreg);
1408         vassert(IS_VALID_VREGNO(m));
1409
1410         /* So here's the spill store.  Assert that we're spilling a
1411            live vreg. */
1412         vassert(vreg_lrs[m].dead_before > ii);
1413         vassert(vreg_lrs[m].reg_class != HRcINVALID);
1414         if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1415            HInstr* spill1 = NULL;
1416            HInstr* spill2 = NULL;
1417            (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1418                         vreg_lrs[m].spill_offset, mode64 );
1419            vassert(spill1 || spill2); /* can't both be NULL */
1420            if (spill1)
1421               EMIT_INSTR(spill1);
1422            if (spill2)
1423               EMIT_INSTR(spill2);
1424         }
1425
1426         /* Update the rreg_state to reflect the new assignment for this
1427            rreg. */
1428         rreg_state[spillee].vreg = vreg;
1429         vreg_state[m] = INVALID_RREG_NO;
1430
1431         rreg_state[spillee].eq_spill_slot = False; /* be safe */
1432
1433         m = hregNumber(vreg);
1434         vassert(IS_VALID_VREGNO(m));
1435         vreg_state[m] = toShort(spillee);
1436
1437         /* Now, if this vreg is being read or modified (as opposed to
1438            written), we have to generate a reload for it. */
1439         if (reg_usage.mode[j] != HRmWrite) {
1440            vassert(vreg_lrs[m].reg_class != HRcINVALID);
1441            HInstr* reload1 = NULL;
1442            HInstr* reload2 = NULL;
1443            (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1444                          vreg_lrs[m].spill_offset, mode64 );
1445            vassert(reload1 || reload2); /* can't both be NULL */
1446            if (reload1)
1447               EMIT_INSTR(reload1);
1448            if (reload2)
1449               EMIT_INSTR(reload2);
1450            /* This rreg is read or modified by the instruction.
1451               If it's merely read we can claim it now equals the
1452               spill slot, but not so if it is modified. */
1453            if (reg_usage.mode[j] == HRmRead) {
1454               rreg_state[spillee].eq_spill_slot = True;
1455            } else {
1456               vassert(reg_usage.mode[j] == HRmModify);
1457               rreg_state[spillee].eq_spill_slot = False;
1458            }
1459         }
1460
1461         /* So after much twisting and turning, we have vreg mapped to
1462            rreg_state[spillee].rreg.  Note that in the map. */
1463         addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1464
1465      } /* iterate over registers in this instruction. */
1466
1467      /* We've finished clowning around with registers in this instruction.
1468         Three results:
1469         - the running rreg_state[] has been updated
1470         - a suitable vreg->rreg mapping for this instruction has been
1471           constructed
1472         - spill and reload instructions may have been emitted.
1473
1474        The final step is to apply the mapping to the instruction,
1475        and emit that.
1476      */
1477
1478      /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1479      (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1480      EMIT_INSTR( instrs_in->arr[ii] );
1481
1482#     if DEBUG_REGALLOC
1483      vex_printf("After dealing with current insn:\n");
1484      PRINT_STATE;
1485      vex_printf("\n");
1486#     endif
1487
1488      /* ------ Post-instruction actions for fixed rreg uses ------ */
1489
1490      /* Now we need to check for rregs exiting fixed live ranges
1491         after this instruction, and if so mark them as free. */
1492      while (True) {
1493         vassert(rreg_lrs_db_next >= 0);
1494         vassert(rreg_lrs_db_next <= rreg_lrs_used);
1495         if (rreg_lrs_db_next == rreg_lrs_used)
1496            break; /* no more real reg live ranges to consider */
1497         if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1498            break; /* next live range does not yet start */
1499         vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1500         /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1501            range.  Mark it as such in the main rreg_state array. */
1502         for (k = 0; k < n_rregs; k++)
1503            if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
1504               break;
1505         /* If this vassertion fails, we don't have an entry for
1506            this rreg.  Which we should. */
1507         vassert(k < n_rregs);
1508         vassert(rreg_state[k].disp == Unavail);
1509         rreg_state[k].disp = Free;
1510         rreg_state[k].vreg = INVALID_HREG;
1511         rreg_state[k].eq_spill_slot = False;
1512
1513         /* check for further rregs leaving HLRs at this point */
1514         rreg_lrs_db_next++;
1515      }
1516
1517#     if DEBUG_REGALLOC
1518      vex_printf("After post-insn actions for fixed regs:\n");
1519      PRINT_STATE;
1520      vex_printf("\n");
1521#     endif
1522
1523   } /* iterate over insns */
1524
1525   /* ------ END: Process each insn in turn. ------ */
1526
1527   /* free(rreg_state); */
1528   /* free(rreg_lrs); */
1529   /* if (vreg_lrs) free(vreg_lrs); */
1530
1531   /* Paranoia */
1532   for (j = 0; j < n_rregs; j++)
1533      vassert(rreg_state[j].rreg == available_real_regs[j]);
1534
1535   vassert(rreg_lrs_la_next == rreg_lrs_used);
1536   vassert(rreg_lrs_db_next == rreg_lrs_used);
1537
1538   return instrs_out;
1539
1540#  undef INVALID_INSTRNO
1541#  undef EMIT_INSTR
1542#  undef PRINT_STATE
1543}
1544
1545
1546
1547/*---------------------------------------------------------------*/
1548/*---                                       host_reg_alloc2.c ---*/
1549/*---------------------------------------------------------------*/
1550