1432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
2432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/
3752f90673ebbb6b2f55fc5e46606dea371313713sewardj/*--- begin                                 host_reg_alloc2.c ---*/
4432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/
5432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
6f8ed9d874a7b8651654591c68c6d431c758d787csewardj/*
7752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This file is part of Valgrind, a dynamic binary instrumentation
8752f90673ebbb6b2f55fc5e46606dea371313713sewardj   framework.
9f8ed9d874a7b8651654591c68c6d431c758d787csewardj
10ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes   Copyright (C) 2004-2017 OpenWorks LLP
11752f90673ebbb6b2f55fc5e46606dea371313713sewardj      info@open-works.net
127bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
13752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This program is free software; you can redistribute it and/or
14752f90673ebbb6b2f55fc5e46606dea371313713sewardj   modify it under the terms of the GNU General Public License as
15752f90673ebbb6b2f55fc5e46606dea371313713sewardj   published by the Free Software Foundation; either version 2 of the
16752f90673ebbb6b2f55fc5e46606dea371313713sewardj   License, or (at your option) any later version.
177bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
18752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This program is distributed in the hope that it will be useful, but
19752f90673ebbb6b2f55fc5e46606dea371313713sewardj   WITHOUT ANY WARRANTY; without even the implied warranty of
20752f90673ebbb6b2f55fc5e46606dea371313713sewardj   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21752f90673ebbb6b2f55fc5e46606dea371313713sewardj   General Public License for more details.
22752f90673ebbb6b2f55fc5e46606dea371313713sewardj
23752f90673ebbb6b2f55fc5e46606dea371313713sewardj   You should have received a copy of the GNU General Public License
24752f90673ebbb6b2f55fc5e46606dea371313713sewardj   along with this program; if not, write to the Free Software
25752f90673ebbb6b2f55fc5e46606dea371313713sewardj   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
267bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj   02110-1301, USA.
277bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
28752f90673ebbb6b2f55fc5e46606dea371313713sewardj   The GNU General Public License is contained in the file COPYING.
29f8ed9d874a7b8651654591c68c6d431c758d787csewardj
30f8ed9d874a7b8651654591c68c6d431c758d787csewardj   Neither the names of the U.S. Department of Energy nor the
31f8ed9d874a7b8651654591c68c6d431c758d787csewardj   University of California nor the names of its contributors may be
32f8ed9d874a7b8651654591c68c6d431c758d787csewardj   used to endorse or promote products derived from this software
33f8ed9d874a7b8651654591c68c6d431c758d787csewardj   without prior written permission.
34f8ed9d874a7b8651654591c68c6d431c758d787csewardj*/
35f8ed9d874a7b8651654591c68c6d431c758d787csewardj
36432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#include "libvex_basictypes.h"
37432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#include "libvex.h"
38432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
39cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "main_util.h"
40cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "host_generic_regs.h"
41432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
42432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Set to 1 for lots of debugging output. */
43432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#define DEBUG_REGALLOC 0
44432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
45432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
46432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* TODO 27 Oct 04:
47432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
48432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Better consistency checking from what isMove tells us.
49432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
50432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   We can possibly do V-V coalescing even when the src is spilled,
51432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   providing we can arrange for the dst to have the same spill slot.
52432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
53432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Note that state[].hreg is the same as the available real regs.
54432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
55432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Generally rationalise data structures.  */
56432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
57432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
58432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Records information on virtual register live ranges.  Computed once
59432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   and remains unchanged after that. */
60432b1c9af7e4421b67644811b94489b57f3cfae1sewardjtypedef
61432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   struct {
62432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Becomes live for the first time after this insn ... */
63432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Short live_after;
64432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Becomes dead for the last time before this insn ... */
65432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Short dead_before;
66432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* The "home" spill slot, if needed.  Never changes. */
67432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Short spill_offset;
68432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Short spill_size;
69432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* What kind of register this is. */
70432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      HRegClass reg_class;
71432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
72ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   VRegLR;
73432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
74432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
75432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Records information on real-register live ranges.  Computed once
76432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   and remains unchanged after that. */
77432b1c9af7e4421b67644811b94489b57f3cfae1sewardjtypedef
78432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   struct {
79432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      HReg rreg;
80432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Becomes live after this insn ... */
81432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Short live_after;
82432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Becomes dead before this insn ... */
83432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Short dead_before;
84432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
85ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   RRegLR;
86432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
87432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
88ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj/* An array of the following structs (rreg_state) comprises the
89ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   running state of the allocator.  It indicates what the current
90ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   disposition of each allocatable real register is.  The array gets
91a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   updated as the allocator processes instructions.  The identity of
92a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   the register is not recorded here, because the index of this
93a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   structure in doRegisterAllocation()'s |rreg_state| is the index
94a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   number of the register, and the register itself can be extracted
95a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   from the RRegUniverse supplied to doRegisterAllocation(). */
96432b1c9af7e4421b67644811b94489b57f3cfae1sewardjtypedef
97432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   struct {
98607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      /* ------ FIELDS WHICH DO NOT CHANGE ------ */
99432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Is this involved in any HLRs?  (only an optimisation hint) */
100432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Bool has_hlrs;
101607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      /* ------ FIELDS WHICH DO CHANGE ------ */
102607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      /* 6 May 07: rearranged fields below so the whole struct fits
103607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         into 16 bytes on both x86 and amd64. */
104607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      /* Used when .disp == Bound and we are looking for vregs to
105607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         spill. */
106607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      Bool is_spill_cand;
107607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      /* Optimisation: used when .disp == Bound.  Indicates when the
108607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         rreg has the same value as the spill slot for the associated
109607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         vreg.  Is safely left at False, and becomes True after a
110607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         spill store or reload for this rreg. */
111607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      Bool eq_spill_slot;
112432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* What's it's current disposition? */
113432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      enum { Free,     /* available for use */
114432b1c9af7e4421b67644811b94489b57f3cfae1sewardj             Unavail,  /* in a real-reg live range */
115432b1c9af7e4421b67644811b94489b57f3cfae1sewardj             Bound     /* in use (holding value of some vreg) */
116432b1c9af7e4421b67644811b94489b57f3cfae1sewardj           }
117432b1c9af7e4421b67644811b94489b57f3cfae1sewardj           disp;
118607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      /* If .disp == Bound, what vreg is it bound to? */
119432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      HReg vreg;
120432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
121432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   RRegState;
122432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
123c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
124ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj/* The allocator also maintains a redundant array of indexes
125ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   (vreg_state) from vreg numbers back to entries in rreg_state.  It
126ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   is redundant because iff vreg_state[i] == j then
127ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
128ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   point at each other.  The purpose of this is to speed up activities
129ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   which involve looking for a particular vreg: there is no need to
130ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   scan the rreg_state looking for it, just index directly into
131ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   vreg_state.  The FAQ "does this vreg already have an associated
132ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   rreg" is the main beneficiary.
133ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
134ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   To indicate, in vreg_state[i], that a given vreg is not currently
135ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   associated with any rreg, that entry can be set to INVALID_RREG_NO.
136ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
137ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   Because the vreg_state entries are signed Shorts, the max number
138ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   of vregs that can be handed by regalloc is 32767.
139ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj*/
140ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
141ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#define INVALID_RREG_NO ((Short)(-1))
142ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
143ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
144ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
145ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
146432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
147432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Search forward from some given point in the incoming instruction
148432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   sequence.  Point is to select a virtual register to spill, by
149432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   finding the vreg which is mentioned as far ahead as possible, in
150432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   the hope that this will minimise the number of consequent reloads.
151432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
152432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Only do the search for vregs which are Bound in the running state,
153432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   and for which the .is_spill_cand field is set.  This allows the
154432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   caller to arbitrarily restrict the set of spill candidates to be
155432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   considered.
156432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
157a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   To do this we don't actually need to see the incoming instruction
158a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   stream.  Rather, what we need us the HRegUsage records for the
159a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   incoming instruction stream.  Hence that is passed in.
160a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
161432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Returns an index into the state array indicating the (v,r) pair to
162432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   spill, or -1 if none was found.  */
163432b1c9af7e4421b67644811b94489b57f3cfae1sewardjstatic
164432b1c9af7e4421b67644811b94489b57f3cfae1sewardjInt findMostDistantlyMentionedVReg (
165a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   HRegUsage*   reg_usages_in,
166432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int          search_from_instr,
167a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   Int          num_instrs,
168432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   RRegState*   state,
169a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   Int          n_state
170432b1c9af7e4421b67644811b94489b57f3cfae1sewardj)
171432b1c9af7e4421b67644811b94489b57f3cfae1sewardj{
172432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int k, m;
173432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int furthest_k = -1;
174432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int furthest   = -1;
175432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   vassert(search_from_instr >= 0);
176432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   for (k = 0; k < n_state; k++) {
177432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      if (!state[k].is_spill_cand)
178432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         continue;
179432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      vassert(state[k].disp == Bound);
180a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (m = search_from_instr; m < num_instrs; m++) {
181a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (HRegUsage__contains(&reg_usages_in[m], state[k].vreg))
182432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            break;
183432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
184432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      if (m > furthest) {
185432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         furthest   = m;
186432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         furthest_k = k;
187432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
188432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
189432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   return furthest_k;
190432b1c9af7e4421b67644811b94489b57f3cfae1sewardj}
191432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
192432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
193478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj/* Check that this vreg has been assigned a sane spill offset. */
194a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjinline
195a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void sanity_check_spill_offset ( VRegLR* vreg )
196478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj{
197c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj   switch (vreg->reg_class) {
198c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      case HRcVec128: case HRcFlt64:
199c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
200c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      default:
201c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
202478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj   }
203478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj}
204478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj
205478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj
206ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj/* Double the size of the real-reg live-range array, if needed. */
207a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj__attribute__((noinline))
208a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
209432b1c9af7e4421b67644811b94489b57f3cfae1sewardj{
210ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   Int     k;
211ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   RRegLR* arr2;
212432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   if (0)
213432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
214432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   vassert(used == *size);
215d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian   arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
216432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   for (k = 0; k < *size; k++)
217432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      arr2[k] = (*info)[k];
218432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   *size *= 2;
219432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   *info = arr2;
220432b1c9af7e4421b67644811b94489b57f3cfae1sewardj}
221a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjinline
222a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj{
224a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   if (LIKELY(used < *size)) return;
225a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   ensureRRLRspace_SLOW(info, size, used);
226a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj}
227432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
228432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
229c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj/* Sort an array of RRegLR entries by either the .live_after or
230c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   .dead_before fields.  This is performance-critical. */
231c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardjstatic void sortRRLRarray ( RRegLR* arr,
232c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj                            Int size, Bool by_live_after )
233c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj{
234c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
235c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj                       9841, 29524, 88573, 265720,
236c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj                       797161, 2391484 };
237c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   Int    lo = 0;
238c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   Int    hi = size-1;
239c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   Int    i, j, h, bigN, hp;
240c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   RRegLR v;
241c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
242c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   vassert(size >= 0);
243c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   if (size == 0)
244c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      return;
245c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
246c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   bigN = hi - lo + 1; if (bigN < 2) return;
247c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
249c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   if (by_live_after) {
250c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
251c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      for ( ; hp >= 0; hp--) {
252c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         h = incs[hp];
253c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         for (i = lo + h; i <= hi; i++) {
254c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            v = arr[i];
255c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            j = i;
256c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            while (arr[j-h].live_after > v.live_after) {
257c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               arr[j] = arr[j-h];
258c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               j = j - h;
259c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               if (j <= (lo + h - 1)) break;
260c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            }
261c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            arr[j] = v;
262c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         }
263c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      }
264c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
265c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   } else {
266c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
267c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      for ( ; hp >= 0; hp--) {
268c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         h = incs[hp];
269c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         for (i = lo + h; i <= hi; i++) {
270c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            v = arr[i];
271c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            j = i;
272c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            while (arr[j-h].dead_before > v.dead_before) {
273c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               arr[j] = arr[j-h];
274c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               j = j - h;
275c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               if (j <= (lo + h - 1)) break;
276c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            }
277c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            arr[j] = v;
278c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         }
279c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      }
280c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
281c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   }
282c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj}
283c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
284c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
285a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj/* Compute the index of the highest and lowest 1 in a ULong,
286a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   respectively.  Results are undefined if the argument is zero.
287a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   Don't pass it zero :) */
288a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic inline UInt ULong__maxIndex ( ULong w64 ) {
289a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   return 63 - __builtin_clzll(w64);
290a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj}
291a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
292a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic inline UInt ULong__minIndex ( ULong w64 ) {
293a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   return __builtin_ctzll(w64);
294a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj}
295a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
296a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
297a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
298a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void* local_memset ( void *destV, Int c, SizeT sz )
299a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj{
300a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj#  define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
301a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
302a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   UInt   c4;
303a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   UChar* d = destV;
304a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   UChar  uc = c;
305a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
306a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   while ((!IS_4_ALIGNED(d)) && sz >= 1) {
307a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      d[0] = uc;
308a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      d++;
309a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      sz--;
310a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   }
311a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   if (sz == 0)
312a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      return destV;
313a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   c4 = uc;
314a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   c4 |= (c4 << 8);
315a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   c4 |= (c4 << 16);
316a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   while (sz >= 16) {
317a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      ((UInt*)d)[0] = c4;
318a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      ((UInt*)d)[1] = c4;
319a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      ((UInt*)d)[2] = c4;
320a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      ((UInt*)d)[3] = c4;
321a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      d += 16;
322a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      sz -= 16;
323a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   }
324a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   while (sz >= 4) {
325a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      ((UInt*)d)[0] = c4;
326a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      d += 4;
327a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      sz -= 4;
328a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   }
329a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   while (sz >= 1) {
330a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      d[0] = c;
331a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      d++;
332a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      sz--;
333a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   }
334a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   return destV;
335a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
336a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj#  undef IS_4_ALIGNED
337a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj}
338a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
339a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
34017bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj/* A target-independent register allocator.  Requires various
34117bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj   functions which it uses to deal abstractly with instructions and
34217bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj   registers, since it cannot have any target-specific knowledge.
343432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
344432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Returns a new list of instructions, which, as a result of the
345432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   behaviour of mapRegs, will be in-place modifications of the
346432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   original instructions.
347432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
348432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Requires that the incoming code has been generated using
349432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
350432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   that range is a checked run-time error.
351432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
352432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Takes an expandable array of pointers to unallocated insns.
353432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Returns an expandable array of pointers to allocated insns.
354432b1c9af7e4421b67644811b94489b57f3cfae1sewardj*/
355432b1c9af7e4421b67644811b94489b57f3cfae1sewardjHInstrArray* doRegisterAllocation (
356432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
357432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Incoming virtual-registerised code. */
358432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   HInstrArray* instrs_in,
359432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
360a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   /* The real-register universe to use.  This contains facts about
361a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      real registers, one of which is the set of registers available
362a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for allocation. */
363a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   const RRegUniverse* univ,
364432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
365432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Return True iff the given insn is a reg-reg move, in which
366432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      case also return the src and dst regs. */
367d8c64e082224b2e688abdef9219cc76fd82b373bflorian   Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
368432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
369432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Get info about register usage in this insn. */
370d8c64e082224b2e688abdef9219cc76fd82b373bflorian   void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
371432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
372432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Apply a reg-reg mapping to an insn. */
37392b643609c5fa432b11fc726c2706ae3f3296eb4cerion   void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
374432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
3756c299f3acab617581ea504e45fbb6cab24c2b29fsewardj   /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
3766c299f3acab617581ea504e45fbb6cab24c2b29fsewardj      real reg to a spill slot byte offset.  The two leading HInstr**
3776c299f3acab617581ea504e45fbb6cab24c2b29fsewardj      args are out parameters, through which the generated insns are
3786c299f3acab617581ea504e45fbb6cab24c2b29fsewardj      returned.  Also (optionally) a 'directReload' function, which
379fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      attempts to replace a given instruction by one which reads
380fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      directly from a specified spill slot.  May be NULL, in which
381fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      case the optimisation is not attempted. */
3826c299f3acab617581ea504e45fbb6cab24c2b29fsewardj   void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
3836c299f3acab617581ea504e45fbb6cab24c2b29fsewardj   void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
384fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj   HInstr* (*directReload) ( HInstr*, HReg, Short ),
385432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int     guest_sizeB,
386432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
387432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* For debug printing only. */
388d8c64e082224b2e688abdef9219cc76fd82b373bflorian   void (*ppInstr) ( const HInstr*, Bool ),
38992b643609c5fa432b11fc726c2706ae3f3296eb4cerion   void (*ppReg) ( HReg ),
39092b643609c5fa432b11fc726c2706ae3f3296eb4cerion
39192b643609c5fa432b11fc726c2706ae3f3296eb4cerion   /* 32/64bit mode */
39292b643609c5fa432b11fc726c2706ae3f3296eb4cerion   Bool mode64
393432b1c9af7e4421b67644811b94489b57f3cfae1sewardj)
394432b1c9af7e4421b67644811b94489b57f3cfae1sewardj{
395432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
396432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
397607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj   const Bool eq_spill_opt = True;
398607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj
399432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Info on vregs and rregs.  Computed once and remains
400432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      unchanged. */
401ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   Int     n_vregs;
402ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
403432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
404c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   /* We keep two copies of the real-reg live range info, one sorted
405c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      by .live_after and the other by .dead_before.  First the
406c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      unsorted info is created in the _la variant is copied into the
407c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      _db variant.  Once that's done both of them are sorted.
408c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      We also need two integer cursors which record the next
409c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      location in the two arrays to consider. */
410c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   RRegLR* rreg_lrs_la;
411c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   RRegLR* rreg_lrs_db;
412ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   Int     rreg_lrs_size;
413ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   Int     rreg_lrs_used;
414c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   Int     rreg_lrs_la_next;
415c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   Int     rreg_lrs_db_next;
416ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj
417a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   /* Info on register usage in the incoming instruction array.
418a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      Computed once and remains unchanged, more or less; updated
419a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      sometimes by the direct-reload optimisation. */
420a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
421a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
422ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   /* Used when constructing vreg_lrs (for allocating stack
423432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      slots). */
424a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   Short ss_busy_until_before[N_SPILL64S];
425432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
426ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   /* Used when constructing rreg_lrs. */
427432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int* rreg_live_after;
428432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   Int* rreg_dead_before;
429432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
430432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Running state of the core allocation algorithm. */
431ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   RRegState* rreg_state;  /* [0 .. n_rregs-1] */
432ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   Int        n_rregs;
433ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
434ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* .. and the redundant backward map */
435ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
436ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      This inplies n_rregs must be <= 32768. */
437ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   Short*     vreg_state;  /* [0 .. n_vregs-1] */
438432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
439432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* The vreg -> rreg map constructed and then applied to each
440432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      instr. */
441432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   HRegRemap remap;
442432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
443432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* The output array of instructions. */
444432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   HInstrArray* instrs_out;
445432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
446ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* Sanity checks are expensive.  They are only done periodically,
447ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      not at each insn processed. */
448ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   Bool do_sanity_check;
449ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
4505074b493ae4af5e6017fac208f823d283c6123c0florian   vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
4515074b493ae4af5e6017fac208f823d283c6123c0florian   vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
45295a487bc73c0f8c9371ad500988a51c9e78ee34aflorian   vassert(0 == (N_SPILL64S % 2));
453432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
454432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* The live range numbers are signed shorts, and so limiting the
4558c50d7dcedc1efc0768548c88b7ce9001b7d4964sewardj      number of insns to 15000 comfortably guards against them
456432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      overflowing 32k. */
4578c50d7dcedc1efc0768548c88b7ce9001b7d4964sewardj   vassert(instrs_in->arr_used <= 15000);
458432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
459432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#  define INVALID_INSTRNO (-2)
460432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
461432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#  define EMIT_INSTR(_instr)                  \
462432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      do {                                    \
463432b1c9af7e4421b67644811b94489b57f3cfae1sewardj        HInstr* _tmp = (_instr);              \
464432b1c9af7e4421b67644811b94489b57f3cfae1sewardj        if (DEBUG_REGALLOC) {                 \
465432b1c9af7e4421b67644811b94489b57f3cfae1sewardj           vex_printf("**  ");                \
46692b643609c5fa432b11fc726c2706ae3f3296eb4cerion           (*ppInstr)(_tmp, mode64);          \
467432b1c9af7e4421b67644811b94489b57f3cfae1sewardj           vex_printf("\n\n");                \
468432b1c9af7e4421b67644811b94489b57f3cfae1sewardj        }                                     \
469432b1c9af7e4421b67644811b94489b57f3cfae1sewardj        addHInstr ( instrs_out, _tmp );       \
470432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      } while (0)
471432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
472ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#   define PRINT_STATE						   \
473ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      do {							   \
474ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         Int z, q;						   \
475ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         for (z = 0; z < n_rregs; z++) {			   \
476ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vex_printf("  rreg_state[%2d] = ", z);		   \
477a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            (*ppReg)(univ->regs[z]);	       			   \
478ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vex_printf("  \t");					   \
479ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            switch (rreg_state[z].disp) {			   \
480ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               case Free:    vex_printf("Free\n"); break;	   \
481ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               case Unavail: vex_printf("Unavail\n"); break;	   \
482ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               case Bound:   vex_printf("BoundTo "); 		   \
483ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                             (*ppReg)(rreg_state[z].vreg);	   \
484ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                             vex_printf("\n"); break;		   \
485ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            }							   \
486ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         }							   \
487ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
488ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         q = 0;                                                    \
489ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         for (z = 0; z < n_vregs; z++) {                           \
490ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (vreg_state[z] == INVALID_RREG_NO)                  \
491ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               continue;                                           \
492ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
493ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            q++;                                                   \
494ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (q > 0 && (q % 6) == 0)                             \
495ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               vex_printf("\n    ");                               \
496ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         }                                                         \
497ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vex_printf("\n");                                         \
498432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      } while (0)
499432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
500432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
501ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* --------- Stage 0: set up output array --------- */
502ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* --------- and allocate/initialise running state. --------- */
503ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
504432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   instrs_out = newHInstrArray();
505432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
506432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ... and initialise running state. */
507ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* n_rregs is no more than a short name for n_available_real_regs. */
508a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   n_rregs = univ->allocable;
509ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   n_vregs = instrs_in->n_vregs;
510ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
511ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* If this is not so, vreg_state entries will overflow. */
512ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   vassert(n_vregs < 32767);
513ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
514a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   /* If this is not so, the universe we have is nonsensical. */
515a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   vassert(n_rregs > 0);
516a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
517d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian   rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
518d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian   vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
519ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
520a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < n_rregs; j++) {
521ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      rreg_state[j].has_hlrs      = False;
522ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      rreg_state[j].disp          = Free;
523ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      rreg_state[j].vreg          = INVALID_HREG;
524ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      rreg_state[j].is_spill_cand = False;
525607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      rreg_state[j].eq_spill_slot = False;
526432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
527432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
528a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < n_vregs; j++)
529ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      vreg_state[j] = INVALID_RREG_NO;
530ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
531ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
532432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* --------- Stage 1: compute vreg live ranges. --------- */
533432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* --------- Stage 2: compute rreg live ranges. --------- */
534432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
535432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
536432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
537432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* This is relatively simple, because (1) we only seek the complete
538432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      end-to-end live range of each vreg, and are not interested in
539432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      any holes in it, and (2) the vregs are conveniently numbered 0
540ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      .. n_vregs-1, so we can just dump the results in a
541ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      pre-allocated array. */
542ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj
543ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   vreg_lrs = NULL;
544ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   if (n_vregs > 0)
545d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian      vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
546ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj
547a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < n_vregs; j++) {
548ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      vreg_lrs[j].live_after     = INVALID_INSTRNO;
549ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      vreg_lrs[j].dead_before    = INVALID_INSTRNO;
550ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      vreg_lrs[j].spill_offset   = 0;
551ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      vreg_lrs[j].spill_size     = 0;
552ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      vreg_lrs[j].reg_class      = HRcINVALID;
553432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
554432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
555a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   /* An array to hold the reg-usage info for the incoming
556a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      instructions. */
557a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   reg_usage_arr
558a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1);
559a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
560432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
561432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
562432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
563432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
564432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* This is more complex than Stage 1, because we need to compute
565432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      exactly all the live ranges of all the allocatable real regs,
566432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      and we don't know in advance how many there will be. */
567432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
568ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   rreg_lrs_used = 0;
569ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   rreg_lrs_size = 4;
570d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian   rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
571c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   rreg_lrs_db = NULL; /* we'll create this later */
572432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
573432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* We'll need to track live range start/end points seperately for
574432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      each rreg.  Sigh. */
575a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   vassert(n_rregs > 0);
576a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   rreg_live_after  = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
577a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
578432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
579a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < n_rregs; j++) {
580432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      rreg_live_after[j] =
581432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      rreg_dead_before[j] = INVALID_INSTRNO;
582432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
583432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
584432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
585432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
586432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ start of ITERATE OVER INSNS ------ */
587432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
588a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
589432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
590a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
591432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
592a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (0) {
593a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n%d  stage1: ", ii);
594a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         (*ppInstr)(instrs_in->arr[ii], mode64);
595a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n");
596a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         ppHRegUsage(univ, &reg_usage_arr[ii]);
597a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
598432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
599432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
600432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
601a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      /* for each virtual reg mentioned in the insn ... */
602a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
603432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
604a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         HReg vreg = reg_usage_arr[ii].vRegs[j];
605a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(hregIsVirtual(vreg));
606a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
607a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int k = hregIndex(vreg);
608ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         if (k < 0 || k >= n_vregs) {
609432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vex_printf("\n");
61092b643609c5fa432b11fc726c2706ae3f3296eb4cerion            (*ppInstr)(instrs_in->arr[ii], mode64);
611432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vex_printf("\n");
612ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
613432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vpanic("doRegisterAllocation: out-of-range vreg");
614432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
615432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
616432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Take the opportunity to note its regclass.  We'll need
617432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            that when allocating spill slots. */
618ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj         if (vreg_lrs[k].reg_class == HRcINVALID) {
619432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            /* First mention of this vreg. */
620ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj            vreg_lrs[k].reg_class = hregClass(vreg);
621432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         } else {
622432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            /* Seen it before, so check for consistency. */
623ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj            vassert(vreg_lrs[k].reg_class == hregClass(vreg));
624432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
625432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
626432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Now consider live ranges. */
627a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         switch (reg_usage_arr[ii].vMode[j]) {
628432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            case HRmRead:
629ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
630d08f2d71ec2efcdf5029b4b571c90452d12b1c72sewardj                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
631432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                  vpanic("doRegisterAllocation: "
632432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                         "first event for vreg is Read");
633d08f2d71ec2efcdf5029b4b571c90452d12b1c72sewardj               }
63440e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj               vreg_lrs[k].dead_before = toShort(ii + 1);
635432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               break;
636432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            case HRmWrite:
637ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj               if (vreg_lrs[k].live_after == INVALID_INSTRNO)
63840e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj                  vreg_lrs[k].live_after = toShort(ii);
63940e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj               vreg_lrs[k].dead_before = toShort(ii + 1);
640432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               break;
641432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            case HRmModify:
642ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj               if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
643109ffdbb31ff652ae83d0ad400966f68c46cd4f1sewardj                  vex_printf("\n\nOFFENDING VREG = %d\n", k);
644432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                  vpanic("doRegisterAllocation: "
645432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                         "first event for vreg is Modify");
646109ffdbb31ff652ae83d0ad400966f68c46cd4f1sewardj               }
64740e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj               vreg_lrs[k].dead_before = toShort(ii + 1);
648432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               break;
649432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            default:
650432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               vpanic("doRegisterAllocation(1)");
651432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         } /* switch */
652432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
653a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      } /* iterate over virtual registers */
654432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
655432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
656432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
657432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
658432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
659a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      /* If this doesn't hold, the following iteration over real registers
660a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         will fail miserably. */
661a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      vassert(N_RREGUNIVERSE_REGS == 64);
662a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
663a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      const ULong rRead      = reg_usage_arr[ii].rRead;
664a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      const ULong rWritten   = reg_usage_arr[ii].rWritten;
665a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      const ULong rMentioned = rRead | rWritten;
666a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
667a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      UInt rReg_minIndex;
668a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      UInt rReg_maxIndex;
669a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (rMentioned == 0) {
670a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         /* There are no real register uses in this insn.  Set
671a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rReg_{min,max}Index so that the following loop doesn't iterate
672a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            at all, so as to avoid wasting time. */
673a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         rReg_minIndex = 1;
674a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         rReg_maxIndex = 0;
675a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      } else {
676a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         rReg_minIndex = ULong__minIndex(rMentioned);
677a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         rReg_maxIndex = ULong__maxIndex(rMentioned);
678a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         /* Don't bother to look at registers which are not available
679a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            to the allocator.  We asserted above that n_rregs > 0, so
680a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            n_rregs-1 is safe. */
681a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (rReg_maxIndex >= n_rregs)
682a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rReg_maxIndex = n_rregs-1;
683a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
684432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
685a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      /* for each allocator-available real reg mentioned in the insn ... */
686a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      /* Note.  We are allocating only over the real regs available to
687a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         the allocator.  Others, eg the stack or baseblock pointers,
688a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         are unavailable to allocation and so we never visit them.
689a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Hence the iteration is cut off at n_rregs-1, since n_rregs ==
690a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         univ->allocable. */
691a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
692432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
693a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         const ULong jMask = 1ULL << j;
694a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (LIKELY((rMentioned & jMask) == 0))
695432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            continue;
696432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
697a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         const Bool isR = (rRead    & jMask) != 0;
698a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         const Bool isW = (rWritten & jMask) != 0;
699a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
700a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         /* Dummy initialisations of flush_la and flush_db to avoid
701a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            possible bogus uninit-var warnings from gcc. */
702a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
703a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Bool flush = False;
704a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
705a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (isW && !isR) {
706a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            flush_la = rreg_live_after[j];
707a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            flush_db = rreg_dead_before[j];
708a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
709a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               flush = True;
710a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rreg_live_after[j]  = ii;
711a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rreg_dead_before[j] = ii+1;
712a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         } else if (!isW && isR) {
713a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (rreg_live_after[j] == INVALID_INSTRNO) {
714a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\nOFFENDING RREG = ");
715a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*ppReg)(univ->regs[j]);
716a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\n");
717a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\nOFFENDING instr = ");
718a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*ppInstr)(instrs_in->arr[ii], mode64);
719a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\n");
720a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vpanic("doRegisterAllocation: "
721a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                      "first event for rreg is Read");
722a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            }
723a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rreg_dead_before[j] = ii+1;
724a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         } else {
725a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(isR && isW);
726a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (rreg_live_after[j] == INVALID_INSTRNO) {
727a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\nOFFENDING RREG = ");
728a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*ppReg)(univ->regs[j]);
729a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\n");
730a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\nOFFENDING instr = ");
731a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*ppInstr)(instrs_in->arr[ii], mode64);
732a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vex_printf("\n");
733a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vpanic("doRegisterAllocation: "
734a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                      "first event for rreg is Modify");
735a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            }
736a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rreg_dead_before[j] = ii+1;
737432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
738432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
739432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         if (flush) {
740432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vassert(flush_la != INVALID_INSTRNO);
741432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vassert(flush_db != INVALID_INSTRNO);
742c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
743432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            if (0)
744432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
745a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            rreg_lrs_la[rreg_lrs_used].rreg        = univ->regs[j];
746c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
747c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
748ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj            rreg_lrs_used++;
749432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
750432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
751a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      } /* iterate over rregs in the instr */
752432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
753432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
754432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
755432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   } /* iterate over insns */
756432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
757432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ end of ITERATE OVER INSNS ------ */
758432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
759432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ start of FINALISE RREG LIVE RANGES ------ */
760432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
761432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Now finish up any live ranges left over. */
762a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < n_rregs; j++) {
763432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
764a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (0) {
765a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
766a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                                                rreg_dead_before[j]);
767a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
768432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      vassert( (rreg_live_after[j] == INVALID_INSTRNO
769a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                && rreg_dead_before[j] == INVALID_INSTRNO)
770432b1c9af7e4421b67644811b94489b57f3cfae1sewardj              ||
771a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (rreg_live_after[j] != INVALID_INSTRNO
772a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                && rreg_dead_before[j] != INVALID_INSTRNO)
773432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            );
774432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
775432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      if (rreg_live_after[j] == INVALID_INSTRNO)
776432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         continue;
777432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
778c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
779432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      if (0)
780432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         vex_printf("FLUSH 2 (%d,%d)\n",
781432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                    rreg_live_after[j], rreg_dead_before[j]);
782a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      rreg_lrs_la[rreg_lrs_used].rreg        = univ->regs[j];
783c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
784c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
785ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      rreg_lrs_used++;
786432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
787432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
788432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Compute summary hints for choosing real regs.  If a real reg is
789432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      involved in a hard live range, record that fact in the fixed
790ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      part of the running rreg_state.  Later, when offered a choice between
791432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      rregs, it's better to choose one which is not marked as having
792432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      any HLRs, since ones with HLRs may need to be spilled around
793432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      their HLRs.  Correctness of final assignment is unaffected by
794ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      this mechanism -- it is only an optimisation. */
795ee313664de48d0956d907b4ea7a954eb7f375683florian
796a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < rreg_lrs_used; j++) {
797a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      HReg rreg = rreg_lrs_la[j].rreg;
798432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      vassert(!hregIsVirtual(rreg));
799432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* rreg is involved in a HLR.  Record this info in the array, if
800432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         there is space. */
801a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      UInt ix = hregIndex(rreg);
802a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      vassert(ix < n_rregs);
803a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      rreg_state[ix].has_hlrs = True;
804432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
805432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   if (0) {
806a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < n_rregs; j++) {
807ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         if (!rreg_state[j].has_hlrs)
808432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            continue;
809a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         ppReg(univ->regs[j]);
810432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         vex_printf(" hinted\n");
811432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
812432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
813432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
814c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   /* Finally, copy the _la variant into the _db variant and
815c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      sort both by their respective fields. */
816d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian   rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
817a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < rreg_lrs_used; j++)
818c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      rreg_lrs_db[j] = rreg_lrs_la[j];
819c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
820c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
821c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
822c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
823c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   /* And set up the cursors. */
824c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   rreg_lrs_la_next = 0;
825c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   rreg_lrs_db_next = 0;
826c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
827a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 1; j < rreg_lrs_used; j++) {
828c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
829c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
830c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   }
831c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
832432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ end of FINALISE RREG LIVE RANGES ------ */
833432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
834a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   if (DEBUG_REGALLOC) {
835a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < n_vregs; j++) {
836a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("vreg %d:  la = %d,  db = %d\n",
837a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                    j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
838a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
839c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   }
840a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
841a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   if (DEBUG_REGALLOC) {
842a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      vex_printf("RRegLRs by LA:\n");
843a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < rreg_lrs_used; j++) {
844a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("  ");
845a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         (*ppReg)(rreg_lrs_la[j].rreg);
846a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("      la = %d,  db = %d\n",
847a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                    rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
848a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
849a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      vex_printf("RRegLRs by DB:\n");
850a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < rreg_lrs_used; j++) {
851a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("  ");
852a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         (*ppReg)(rreg_lrs_db[j].rreg);
853a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("      la = %d,  db = %d\n",
854a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                    rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
855a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
856432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
857432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
858432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* --------- Stage 3: allocate spill slots. --------- */
859432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
8607fb65ebcac4ee8522f9b6823d857d87b04146c31sewardj   /* Each spill slot is 8 bytes long.  For vregs which take more than
8617fb65ebcac4ee8522f9b6823d857d87b04146c31sewardj      64 bits to spill (classes Flt64 and Vec128), we have to allocate
862c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      two consecutive spill slots.  For 256 bit registers (class
863c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      Vec256), we have to allocate four consecutive spill slots.
864432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
865478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      For Vec128-class on PowerPC, the spill slot's actual address
866478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      must be 16-byte aligned.  Since the spill slot's address is
867478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      computed as an offset from the guest state pointer, and since
868478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      the user of the generated code must set that pointer to a
869c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      32-aligned value, we have the residual obligation here of
870478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      choosing a 16-aligned spill slot offset for Vec128-class values.
871478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      Since each spill slot is 8 bytes long, that means for
872478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      Vec128-class values we must allocated a spill slot number which
873478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      is zero mod 2.
874478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj
875a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      Similarly, for Vec256 class on amd64, find a spill slot number
876c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      which is zero mod 4.  This guarantees it will be 32 byte
877c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      aligned, which isn't actually necessary on amd64 (we use movUpd
878c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      etc to spill), but seems like good practice.
879c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj
880432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Do a rank-based allocation of vregs to spill slot numbers.  We
881607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj      put as few values as possible in spill slots, but nevertheless
882432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      need to have a spill slot available for all vregs, just in case.
883432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   */
884a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   /* Int max_ss_no = -1; */
885432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
886a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
887432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
888a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int j = 0; j < n_vregs; j++) {
889432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
890432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* True iff this vreg is unused.  In which case we also expect
891432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         that the reg_class field for it has not been set.  */
892ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj      if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
893ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj         vassert(vreg_lrs[j].reg_class == HRcINVALID);
894432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         continue;
895432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
896432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
8977fb65ebcac4ee8522f9b6823d857d87b04146c31sewardj      /* The spill slots are 64 bits in size.  As per the comment on
898c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         definition of HRegClass in host_generic_regs.h, that means,
899c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         to spill a vreg of class Flt64 or Vec128, we'll need to find
900c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         two adjacent spill slots to use.  For Vec256, we'll need to
901c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         find four adjacent slots to use.  Note, this logic needs to
902c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         kept in sync with the size info on the definition of
903c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         HRegClass. */
904a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      Int ss_no = -1;
905c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj      switch (vreg_lrs[j].reg_class) {
906c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj
907c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         case HRcVec128: case HRcFlt64:
908c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            /* Find two adjacent free slots in which between them
909c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               provide up to 128 bits in which to spill the vreg.
910c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               Since we are trying to find an even:odd pair, move
911c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               along in steps of 2 (slots). */
912a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
913a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
914a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                   && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
915c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj                  break;
916a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (ss_no >= N_SPILL64S-1) {
917c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
918c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj                      "Increase and recompile.");
919c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            }
920a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
921a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
922c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            break;
923d08f2d71ec2efcdf5029b4b571c90452d12b1c72sewardj
924c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj         default:
925c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            /* The ordinary case -- just find a single spill slot. */
926c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            /* Find the lowest-numbered spill slot which is available
927c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               at the start point of this interval, and assign the
928c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               interval to it. */
929a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
930a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
931c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj                  break;
932a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (ss_no == N_SPILL64S) {
933c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj               vpanic("LibVEX_N_SPILL_BYTES is too low.  "
934c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj                      "Increase and recompile.");
935c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            }
936a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
937c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj            break;
938432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
939a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      } /* switch (vreg_lrs[j].reg_class) */
940432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
941432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* This reflects LibVEX's hard-wired knowledge of the baseBlock
942478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj         layout: the guest state, then two equal sized areas following
943478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj         it for two sets of shadow state, and then the spill area. */
944a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
945432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
946478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      /* Independent check that we've made a sane choice of slot */
947478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj      sanity_check_spill_offset( &vreg_lrs[j] );
948432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* if (j > max_ss_no) */
949432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /*    max_ss_no = j; */
950432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   }
951432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
952a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   if (0) {
953a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      vex_printf("\n\n");
954a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < n_vregs; j++)
955a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("vreg %d    --> spill offset %d\n",
956a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                    j, vreg_lrs[j].spill_offset);
957a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   }
958432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
959432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* --------- Stage 4: establish rreg preferences --------- */
960432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
961432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* It may be advantageous to allocating certain vregs to specific
962432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      rregs, as a way of avoiding reg-reg moves later.  Here we
963432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      establish which, if any, rreg each vreg would prefer to be in.
964432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      Note that this constrains the allocator -- ideally we end up
9652fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj      with as few as possible vregs expressing a preference.
966432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
9672fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj      This is an optimisation: if the .preferred_rreg field is never
9682fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj      set to anything different from INVALID_HREG, the allocator still
9692fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj      works. */
97017bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj
97117bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj   /* 30 Dec 04: removed this mechanism as it does not seem to
97217bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj      help. */
973432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
974432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* --------- Stage 5: process instructions --------- */
975432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
976432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* This is the main loop of the allocator.  First, we need to
977432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      correctly set up our running state, which tracks the status of
978432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      each real register. */
979432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
980432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ BEGIN: Process each insn in turn. ------ */
981432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
982a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj   for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
983432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
984a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (DEBUG_REGALLOC) {
985a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
986a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("---- ");
987a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         (*ppInstr)(instrs_in->arr[ii], mode64);
988a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n\nInitial state:\n");
989a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         PRINT_STATE;
990a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n");
991a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
992432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
993432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------------ Sanity checks ------------ */
994432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
995ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      /* Sanity checks are expensive.  So they are done only once
996a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes         every 17 instructions, and just before the last
997ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         instruction. */
998ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      do_sanity_check
999ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         = toBool(
1000a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj              False /* Set to True for sanity checking of all insns. */
1001ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj              || ii == instrs_in->arr_used-1
1002a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes              || (ii > 0 && (ii % 17) == 0)
1003ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj           );
1004ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
1005ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      if (do_sanity_check) {
1006ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
1007ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* Sanity check 1: all rregs with a hard live range crossing
1008ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            this insn must be marked as unavailable in the running
1009ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            state. */
1010a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         for (Int j = 0; j < rreg_lrs_used; j++) {
1011a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (rreg_lrs_la[j].live_after < ii
1012c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj                && ii < rreg_lrs_la[j].dead_before) {
1013ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               /* ii is the middle of a hard live range for some real
1014ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                  reg.  Check it's marked as such in the running
1015ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                  state. */
1016a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               HReg reg = rreg_lrs_la[j].rreg;
1017ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
1018a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               if (0) {
1019a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                  vex_printf("considering la %d .. db %d   reg = ",
1020a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                             rreg_lrs_la[j].live_after,
1021a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                             rreg_lrs_la[j].dead_before);
1022a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                  (*ppReg)(reg);
1023a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                  vex_printf("\n");
1024a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               }
1025432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1026a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               /* assert that this rreg is marked as unavailable */
1027a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vassert(!hregIsVirtual(reg));
1028a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vassert(rreg_state[hregIndex(reg)].disp == Unavail);
1029ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            }
1030432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1031432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1032ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* Sanity check 2: conversely, all rregs marked as
1033ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            unavailable in the running rreg_state must have a
1034ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            corresponding hard live range entry in the rreg_lrs
1035ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            array. */
1036a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         for (Int j = 0; j < n_rregs; j++) {
1037ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(rreg_state[j].disp == Bound
1038ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                    || rreg_state[j].disp == Free
1039ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                    || rreg_state[j].disp == Unavail);
1040ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (rreg_state[j].disp != Unavail)
1041ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               continue;
1042a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            Int k;
1043a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            for (k = 0; k < rreg_lrs_used; k++) {
1044a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               HReg reg = rreg_lrs_la[k].rreg;
1045a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vassert(!hregIsVirtual(reg));
1046a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               if (hregIndex(reg) == j
1047c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj                   && rreg_lrs_la[k].live_after < ii
1048c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj                   && ii < rreg_lrs_la[k].dead_before)
1049ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                  break;
1050a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            }
1051ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            /* If this vassertion fails, we couldn't find a
1052ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               corresponding HLR. */
1053ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(k < rreg_lrs_used);
1054ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         }
1055432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1056ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* Sanity check 3: all vreg-rreg bindings must bind registers
1057ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            of the same class. */
1058a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         for (Int j = 0; j < n_rregs; j++) {
1059607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            if (rreg_state[j].disp != Bound) {
1060607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               vassert(rreg_state[j].eq_spill_slot == False);
1061ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               continue;
1062607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            }
1063a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(hregClass(univ->regs[j])
1064ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                    == hregClass(rreg_state[j].vreg));
1065ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert( hregIsVirtual(rreg_state[j].vreg));
1066ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         }
1067432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1068ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* Sanity check 4: the vreg_state and rreg_state
1069ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            mutually-redundant mappings are consistent.  If
1070ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[j].vreg points at some vreg_state entry then
1071ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            that vreg_state entry should point back at
1072ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[j]. */
1073a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         for (Int j = 0; j < n_rregs; j++) {
1074ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (rreg_state[j].disp != Bound)
1075ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               continue;
1076a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            Int k = hregIndex(rreg_state[j].vreg);
1077ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(IS_VALID_VREGNO(k));
1078ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(vreg_state[k] == j);
1079ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         }
1080a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         for (Int j = 0; j < n_vregs; j++) {
1081a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            Int k = vreg_state[j];
1082ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (k == INVALID_RREG_NO)
1083ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj               continue;
1084ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(IS_VALID_RREGNO(k));
1085ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(rreg_state[k].disp == Bound);
1086a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(hregIndex(rreg_state[k].vreg) == j);
1087ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         }
1088ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
1089ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj      } /* if (do_sanity_check) */
1090432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1091432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------------ end of Sanity checks ------------ */
1092432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1093432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Do various optimisations pertaining to register coalescing
1094432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         and preferencing:
1095432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            MOV  v <-> v   coalescing (done here).
1096432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            MOV  v <-> r   coalescing (not yet, if ever)
1097432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      */
1098432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* If doing a reg-reg move between two vregs, and the src's live
1099432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         range ends here and the dst's live range starts here, bind
1100432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         the dst to the src's rreg, and that's all. */
1101a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      HReg vregS = INVALID_HREG;
1102a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      HReg vregD = INVALID_HREG;
1103432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1104432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1105432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1106432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Check that *isMove is not telling us a bunch of lies ... */
1107432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         vassert(hregClass(vregS) == hregClass(vregD));
1108a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int k = hregIndex(vregS);
1109a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int m = hregIndex(vregD);
1110ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(IS_VALID_VREGNO(k));
1111ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(IS_VALID_VREGNO(m));
1112ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj         if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1113ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj         if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1114a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (DEBUG_REGALLOC) {
1115432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         vex_printf("COALESCE ");
1116a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            (*ppReg)(vregS);
1117a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vex_printf(" -> ");
1118a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            (*ppReg)(vregD);
1119a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vex_printf("\n\n");
1120a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         }
1121432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Find the state entry for vregS. */
1122a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int n = vreg_state[k]; /* k is the index of vregS */
1123a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (n == INVALID_RREG_NO) {
1124a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            /* vregS is not currently in a real register.  So we can't
1125a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               do the coalescing.  Give up. */
1126432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            goto cannot_coalesce;
1127a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         }
1128a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(IS_VALID_RREGNO(n));
1129432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1130432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Finally, we can do the coalescing.  It's trivial -- merely
1131432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            claim vregS's register for vregD. */
1132a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         rreg_state[n].vreg = vregD;
1133a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1134a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1135a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vreg_state[hregIndex(vregD)] = toShort(n);
1136a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1137432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1138607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         /* This rreg has become associated with a different vreg and
1139607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            hence with a different spill slot.  Play safe. */
1140a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         rreg_state[n].eq_spill_slot = False;
1141607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj
1142432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Move on to the next insn.  We skip the post-insn stuff for
1143432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            fixed registers, since this move should not interact with
1144432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            them in any way. */
1145432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         continue;
1146432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
1147432b1c9af7e4421b67644811b94489b57f3cfae1sewardj     cannot_coalesce:
1148432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1149432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ Free up rregs bound to dead vregs ------ */
1150432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1151432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Look for vregs whose live range has just ended, and
1152432b1c9af7e4421b67644811b94489b57f3cfae1sewardj	 mark the associated rreg as free. */
1153432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1154a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < n_rregs; j++) {
1155ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         if (rreg_state[j].disp != Bound)
1156432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            continue;
1157a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         UInt vregno = hregIndex(rreg_state[j].vreg);
1158c17ce029081f7ad437538cf474c51de80ab50964florian         vassert(IS_VALID_VREGNO(vregno));
1159c17ce029081f7ad437538cf474c51de80ab50964florian         if (vreg_lrs[vregno].dead_before <= ii) {
1160ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[j].disp = Free;
1161607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            rreg_state[j].eq_spill_slot = False;
1162a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            Int m = hregIndex(rreg_state[j].vreg);
1163ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vassert(IS_VALID_VREGNO(m));
1164ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            vreg_state[m] = INVALID_RREG_NO;
1165432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            if (DEBUG_REGALLOC) {
1166432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               vex_printf("free up ");
1167a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*ppReg)(univ->regs[j]);
1168432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               vex_printf("\n");
1169432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            }
1170432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1171432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
1172432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1173432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ Pre-instruction actions for fixed rreg uses ------ */
1174432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1175432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Now we have to deal with rregs which are about to be made
1176432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         live by this instruction -- in other words, are entering into
1177432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         one of their live ranges.  If any such rreg holds a vreg, we
1178432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         will have to free up the rreg.  The simplest solution which
1179432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         is correct is to spill the rreg.
1180432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1181432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         Note we could do better:
1182432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         * Could move it into some other free rreg, if one is available
1183432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1184c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         Do this efficiently, by incrementally stepping along an array
1185c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         of rreg HLRs that are known to be sorted by start point
1186c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         (their .live_after field).
1187432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      */
1188c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      while (True) {
1189c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(rreg_lrs_la_next >= 0);
1190c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(rreg_lrs_la_next <= rreg_lrs_used);
1191c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         if (rreg_lrs_la_next == rreg_lrs_used)
1192c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            break; /* no more real reg live ranges to consider */
1193c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1194c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            break; /* next live range does not yet start */
1195c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1196c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1197c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            Find the associated rreg_state entry. */
1198c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1199c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            Real register live ranges are guaranteed to be well-formed
1200c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            in that they start with a write to the register -- Stage 2
1201c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            rejects any code not satisfying this.  So the correct
1202c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            question to ask is whether
1203c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1204c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            whether the reg becomes live after this insn -- rather
1205c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            than before it. */
1206a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (DEBUG_REGALLOC) {
1207a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vex_printf("need to free up rreg: ");
1208a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1209a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vex_printf("\n\n");
1210a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         }
1211a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1212a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj
1213c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         /* If this fails, we don't have an entry for this rreg.
1214c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            Which we should. */
1215c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(IS_VALID_RREGNO(k));
1216a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int m = hregIndex(rreg_state[k].vreg);
1217c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         if (rreg_state[k].disp == Bound) {
1218c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            /* Yes, there is an associated vreg.  Spill it if it's
1219c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               still live. */
1220c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            vassert(IS_VALID_VREGNO(m));
1221c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            vreg_state[m] = INVALID_RREG_NO;
1222c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            if (vreg_lrs[m].dead_before > ii) {
1223c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj               vassert(vreg_lrs[m].reg_class != HRcINVALID);
1224607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
12256c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  HInstr* spill1 = NULL;
12266c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  HInstr* spill2 = NULL;
1227a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                  (*genSpill)( &spill1, &spill2, univ->regs[k],
12286c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                               vreg_lrs[m].spill_offset, mode64 );
12296c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  vassert(spill1 || spill2); /* can't both be NULL */
12306c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  if (spill1)
12316c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                     EMIT_INSTR(spill1);
12326c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  if (spill2)
12336c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                     EMIT_INSTR(spill2);
1234607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               }
1235607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               rreg_state[k].eq_spill_slot = True;
1236432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            }
1237432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1238c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         rreg_state[k].disp = Unavail;
1239c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         rreg_state[k].vreg = INVALID_HREG;
1240607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         rreg_state[k].eq_spill_slot = False;
1241c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
1242c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         /* check for further rregs entering HLRs at this point */
1243c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         rreg_lrs_la_next++;
1244432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
1245432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1246a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (DEBUG_REGALLOC) {
1247a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("After pre-insn actions for fixed regs:\n");
1248a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         PRINT_STATE;
1249a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n");
1250a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
1251432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1252432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ Deal with the current instruction. ------ */
1253432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1254432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Finally we can begin the processing of this instruction
1255432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         itself.  The aim is to free up enough rregs for this insn.
1256432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         This may generate spill stores since we may have to evict
1257432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         some vregs currently in rregs.  Also generates spill loads.
1258432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         We also build up the final vreg->rreg mapping to be applied
1259432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         to the insn. */
1260432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1261432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      initHRegRemap(&remap);
1262432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1263fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      /* ------------ BEGIN directReload optimisation ----------- */
1264fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1265fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      /* If the instruction reads exactly one vreg which is currently
1266fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj         in a spill slot, and this is last use of that vreg, see if we
1267a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         can convert the instruction into one that reads directly from
1268a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         the spill slot.  This is clearly only possible for x86 and
1269a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         amd64 targets, since ppc and arm are load-store
1270a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         architectures.  If successful, replace instrs_in->arr[ii]
1271a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         with this new instruction, and recompute its reg usage, so
1272a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         that the change is invisible to the standard-case handling
1273a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         that follows. */
1274fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1275a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
1276a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Bool  debug_direct_reload = False;
1277fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj         HReg  cand     = INVALID_HREG;
1278fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj         Bool  nreads   = 0;
1279fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj         Short spilloff = 0;
1280fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1281a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1282fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1283a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            HReg vreg = reg_usage_arr[ii].vRegs[j];
1284a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(hregIsVirtual(vreg));
1285fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1286a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1287fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               nreads++;
1288a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               Int m = hregIndex(vreg);
1289fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               vassert(IS_VALID_VREGNO(m));
1290a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               Int k = vreg_state[m];
1291fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               if (!IS_VALID_RREGNO(k)) {
1292fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  /* ok, it is spilled.  Now, is this its last use? */
1293fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  vassert(vreg_lrs[m].dead_before >= ii+1);
1294fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  if (vreg_lrs[m].dead_before == ii+1
129579efdc6ea93db174395af845d4e21a4a7ad600ccflorian                      && hregIsInvalid(cand)) {
1296fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                     spilloff = vreg_lrs[m].spill_offset;
1297fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                     cand = vreg;
1298fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  }
1299fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               }
1300fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            }
1301fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj         }
1302fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
130379efdc6ea93db174395af845d4e21a4a7ad600ccflorian         if (nreads == 1 && ! hregIsInvalid(cand)) {
1304fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            HInstr* reloaded;
1305a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (reg_usage_arr[ii].n_vRegs == 2)
1306a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1307a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                                  reg_usage_arr[ii].vRegs[1]));
1308fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1309fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1310fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            if (debug_direct_reload && !reloaded) {
1311fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1312fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               ppInstr(instrs_in->arr[ii], mode64);
1313fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            }
1314fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            if (reloaded) {
1315fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               /* Update info about the insn, so it looks as if it had
1316fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  been in this form all along. */
1317fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               instrs_in->arr[ii] = reloaded;
1318a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
1319fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               if (debug_direct_reload && !reloaded) {
1320fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  vex_printf("  -->  ");
1321fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj                  ppInstr(reloaded, mode64);
1322fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               }
1323fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            }
1324fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1325fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj            if (debug_direct_reload && !reloaded)
1326fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj               vex_printf("\n");
1327fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj         }
1328fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1329fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      }
1330fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1331fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj      /* ------------ END directReload optimisation ------------ */
1332fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj
1333a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      /* for each virtual reg mentioned in the insn ... */
1334a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1335432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1336a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         HReg vreg = reg_usage_arr[ii].vRegs[j];
1337a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(hregIsVirtual(vreg));
1338432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1339a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (0) {
1340a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1341a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         }
1342432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1343432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Now we're trying to find a rreg for "vreg".  First of all,
1344432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            if it already has an rreg assigned, we don't need to do
1345a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            anything more.  Inspect the current state to find out. */
1346a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int m = hregIndex(vreg);
1347ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(IS_VALID_VREGNO(m));
1348a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int n = vreg_state[m];
1349a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (IS_VALID_RREGNO(n)) {
1350a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(rreg_state[n].disp == Bound);
1351a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            addToHRegRemap(&remap, vreg, univ->regs[n]);
1352607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            /* If this rreg is written or modified, mark it as different
1353607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               from any spill slot value. */
1354a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (reg_usage_arr[ii].vMode[j] != HRmRead)
1355a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               rreg_state[n].eq_spill_slot = False;
1356432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            continue;
1357ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         } else {
1358a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(n == INVALID_RREG_NO);
1359432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1360432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1361432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* No luck.  The next thing to do is see if there is a
1362432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            currently free rreg available, of the correct class.  If
1363432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            so, bag it.  NOTE, we could improve this by selecting an
1364432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            rreg for which the next live-range event is as far ahead
1365432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            as possible. */
1366a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int k_suboptimal = -1;
1367a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int k;
1368ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         for (k = 0; k < n_rregs; k++) {
1369ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (rreg_state[k].disp != Free
1370a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                || hregClass(univ->regs[k]) != hregClass(vreg))
1371432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               continue;
1372ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (rreg_state[k].has_hlrs) {
1373432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               /* Well, at least we can use k_suboptimal if we really
1374432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                  have to.  Keep on looking for a better candidate. */
1375432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               k_suboptimal = k;
1376432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            } else {
1377432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               /* Found a preferable reg.  Use it. */
1378432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               k_suboptimal = -1;
1379432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               break;
1380432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            }
1381432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1382432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         if (k_suboptimal >= 0)
1383432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            k = k_suboptimal;
1384432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1385ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         if (k < n_rregs) {
1386ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[k].disp = Bound;
1387ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[k].vreg = vreg;
1388a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            Int p = hregIndex(vreg);
1389a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vassert(IS_VALID_VREGNO(p));
1390a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            vreg_state[p] = toShort(k);
1391a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            addToHRegRemap(&remap, vreg, univ->regs[k]);
1392607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            /* Generate a reload if needed.  This only creates needed
1393607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               reloads because the live range builder for vregs will
1394607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               guarantee that the first event for a vreg is a write.
1395607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               Hence, if this reference is not a write, it cannot be
1396607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               the first reference for this vreg, and so a reload is
1397607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               indeed needed. */
1398a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1399a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vassert(vreg_lrs[p].reg_class != HRcINVALID);
14006c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               HInstr* reload1 = NULL;
14016c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               HInstr* reload2 = NULL;
1402a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               (*genReload)( &reload1, &reload2, univ->regs[k],
1403a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                             vreg_lrs[p].spill_offset, mode64 );
14046c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               vassert(reload1 || reload2); /* can't both be NULL */
14056c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               if (reload1)
14066c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  EMIT_INSTR(reload1);
14076c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               if (reload2)
14086c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                  EMIT_INSTR(reload2);
14097342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               /* This rreg is read or modified by the instruction.
14107342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj                  If it's merely read we can claim it now equals the
14117342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj                  spill slot, but not so if it is modified. */
1412a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               if (reg_usage_arr[ii].vMode[j] == HRmRead) {
14137342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj                  rreg_state[k].eq_spill_slot = True;
14147342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               } else {
1415a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                  vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
14167342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj                  rreg_state[k].eq_spill_slot = False;
14177342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               }
1418607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj            } else {
1419607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj               rreg_state[k].eq_spill_slot = False;
1420432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            }
1421607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj
1422432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            continue;
1423432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1424432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1425432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Well, now we have no option but to spill a vreg.  It's
1426432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            important to make a good choice of vreg to spill, and of
1427432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            course we need to be careful not to spill a vreg which is
1428432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            needed by this insn. */
1429432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1430ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* First, mark in the rreg_state, those rregs which are not spill
1431432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            candidates, due to holding a vreg mentioned by this
1432432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            instruction.  Or being of the wrong class. */
1433ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         for (k = 0; k < n_rregs; k++) {
1434ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[k].is_spill_cand = False;
1435ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            if (rreg_state[k].disp != Bound)
1436432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               continue;
1437a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (hregClass(univ->regs[k]) != hregClass(vreg))
1438432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               continue;
1439ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[k].is_spill_cand = True;
1440a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            /* Note, the following loop visits only the virtual regs
1441a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               mentioned by the instruction. */
1442a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1443a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1444ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj                  rreg_state[k].is_spill_cand = False;
1445432b1c9af7e4421b67644811b94489b57f3cfae1sewardj                  break;
1446432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               }
1447432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            }
1448432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1449432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1450432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* We can choose to spill any rreg satisfying
1451ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj            rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1452432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            the next use of its associated vreg is as far ahead as
1453432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            possible, in the hope that this will minimise the number
1454432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            of consequent reloads required. */
1455a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int spillee
1456432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            = findMostDistantlyMentionedVReg (
1457a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj                 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs );
1458432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1459432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         if (spillee == -1) {
1460432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            /* Hmmmmm.  There don't appear to be any spill candidates.
1461432b1c9af7e4421b67644811b94489b57f3cfae1sewardj               We're hosed. */
1462432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vex_printf("reg_alloc: can't find a register in class: ");
1463432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            ppHRegClass(hregClass(vreg));
1464432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vex_printf("\n");
1465432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            vpanic("reg_alloc: can't create a free register.");
1466432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1467432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1468ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* Right.  So we're going to spill rreg_state[spillee]. */
1469ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(IS_VALID_RREGNO(spillee));
1470ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(rreg_state[spillee].disp == Bound);
1471432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* check it's the right class */
1472a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
1473432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* check we're not ejecting the vreg for which we are trying
1474432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            to free up a register. */
147579efdc6ea93db174395af845d4e21a4a7ad600ccflorian         vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1476432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1477a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         m = hregIndex(rreg_state[spillee].vreg);
1478ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(IS_VALID_VREGNO(m));
1479432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1480432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* So here's the spill store.  Assert that we're spilling a
1481432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            live vreg. */
1482ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj         vassert(vreg_lrs[m].dead_before > ii);
1483ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj         vassert(vreg_lrs[m].reg_class != HRcINVALID);
1484607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
14856c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            HInstr* spill1 = NULL;
14866c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            HInstr* spill2 = NULL;
1487a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            (*genSpill)( &spill1, &spill2, univ->regs[spillee],
14886c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                         vreg_lrs[m].spill_offset, mode64 );
14896c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            vassert(spill1 || spill2); /* can't both be NULL */
14906c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            if (spill1)
14916c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               EMIT_INSTR(spill1);
14926c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            if (spill2)
14936c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               EMIT_INSTR(spill2);
1494607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         }
1495432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1496ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         /* Update the rreg_state to reflect the new assignment for this
1497432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            rreg. */
1498ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         rreg_state[spillee].vreg = vreg;
1499ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vreg_state[m] = INVALID_RREG_NO;
1500ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj
1501607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         rreg_state[spillee].eq_spill_slot = False; /* be safe */
1502607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj
1503a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         m = hregIndex(vreg);
1504ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         vassert(IS_VALID_VREGNO(m));
1505d4d3dd6ab27066963d21fca93394d6d0f0674a19sewardj         vreg_state[m] = toShort(spillee);
1506432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1507432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* Now, if this vreg is being read or modified (as opposed to
1508432b1c9af7e4421b67644811b94489b57f3cfae1sewardj            written), we have to generate a reload for it. */
1509a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1510ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj            vassert(vreg_lrs[m].reg_class != HRcINVALID);
15116c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            HInstr* reload1 = NULL;
15126c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            HInstr* reload2 = NULL;
1513a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            (*genReload)( &reload1, &reload2, univ->regs[spillee],
15146c299f3acab617581ea504e45fbb6cab24c2b29fsewardj                          vreg_lrs[m].spill_offset, mode64 );
15156c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            vassert(reload1 || reload2); /* can't both be NULL */
15166c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            if (reload1)
15176c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               EMIT_INSTR(reload1);
15186c299f3acab617581ea504e45fbb6cab24c2b29fsewardj            if (reload2)
15196c299f3acab617581ea504e45fbb6cab24c2b29fsewardj               EMIT_INSTR(reload2);
15207342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj            /* This rreg is read or modified by the instruction.
15217342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               If it's merely read we can claim it now equals the
15227342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               spill slot, but not so if it is modified. */
1523a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj            if (reg_usage_arr[ii].vMode[j] == HRmRead) {
15247342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               rreg_state[spillee].eq_spill_slot = True;
15257342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj            } else {
1526a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj               vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
15277342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj               rreg_state[spillee].eq_spill_slot = False;
15287342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj            }
1529432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         }
1530432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1531432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         /* So after much twisting and turning, we have vreg mapped to
15327342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj            rreg_state[spillee].rreg.  Note that in the map. */
1533a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         addToHRegRemap(&remap, vreg, univ->regs[spillee]);
1534432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1535a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      } /* iterate over virtual registers in this instruction. */
1536432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1537432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* We've finished clowning around with registers in this instruction.
1538432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         Three results:
1539ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj         - the running rreg_state[] has been updated
1540432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         - a suitable vreg->rreg mapping for this instruction has been
1541432b1c9af7e4421b67644811b94489b57f3cfae1sewardj           constructed
1542432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         - spill and reload instructions may have been emitted.
1543432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1544432b1c9af7e4421b67644811b94489b57f3cfae1sewardj        The final step is to apply the mapping to the instruction,
1545432b1c9af7e4421b67644811b94489b57f3cfae1sewardj        and emit that.
1546432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      */
1547432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1548432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
154992b643609c5fa432b11fc726c2706ae3f3296eb4cerion      (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1550432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      EMIT_INSTR( instrs_in->arr[ii] );
1551432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1552a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (DEBUG_REGALLOC) {
1553a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("After dealing with current insn:\n");
1554a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         PRINT_STATE;
1555a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n");
1556a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
1557432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1558432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* ------ Post-instruction actions for fixed rreg uses ------ */
1559432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1560432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      /* Now we need to check for rregs exiting fixed live ranges
1561432b1c9af7e4421b67644811b94489b57f3cfae1sewardj         after this instruction, and if so mark them as free. */
1562c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj      while (True) {
1563c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(rreg_lrs_db_next >= 0);
1564c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(rreg_lrs_db_next <= rreg_lrs_used);
1565c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         if (rreg_lrs_db_next == rreg_lrs_used)
1566c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            break; /* no more real reg live ranges to consider */
1567c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1568c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            break; /* next live range does not yet start */
1569c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1570c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1571c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj            range.  Mark it as such in the main rreg_state array. */
1572a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1573a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(!hregIsVirtual(reg));
1574a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         Int k = hregIndex(reg);
1575a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vassert(IS_VALID_RREGNO(k));
1576c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         vassert(rreg_state[k].disp == Unavail);
1577c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         rreg_state[k].disp = Free;
1578c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         rreg_state[k].vreg = INVALID_HREG;
1579607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj         rreg_state[k].eq_spill_slot = False;
1580c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
1581c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         /* check for further rregs leaving HLRs at this point */
1582c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj         rreg_lrs_db_next++;
1583432b1c9af7e4421b67644811b94489b57f3cfae1sewardj      }
1584432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1585a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      if (DEBUG_REGALLOC) {
1586a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("After post-insn actions for fixed regs:\n");
1587a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         PRINT_STATE;
1588a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj         vex_printf("\n");
1589a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj      }
1590432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1591432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   } /* iterate over insns */
1592432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1593432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* ------ END: Process each insn in turn. ------ */
1594432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1595ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj   /* free(rreg_state); */
1596ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   /* free(rreg_lrs); */
1597ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj   /* if (vreg_lrs) free(vreg_lrs); */
1598432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1599432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   /* Paranoia */
1600c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   vassert(rreg_lrs_la_next == rreg_lrs_used);
1601c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj   vassert(rreg_lrs_db_next == rreg_lrs_used);
1602c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj
1603432b1c9af7e4421b67644811b94489b57f3cfae1sewardj   return instrs_out;
1604432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1605432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#  undef INVALID_INSTRNO
1606432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#  undef EMIT_INSTR
1607432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#  undef PRINT_STATE
1608432b1c9af7e4421b67644811b94489b57f3cfae1sewardj}
1609432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1610432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1611432b1c9af7e4421b67644811b94489b57f3cfae1sewardj
1612432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/
1613cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj/*---                                       host_reg_alloc2.c ---*/
1614432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/
1615