1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2011 Christoph Bumiller
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"),
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions:
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice shall be included in
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * all copies or substantial portions of the Software.
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SOFTWARE.
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv50_ir.h"
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv50_ir_target.h"
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <stack>
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <limits>
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace nv50_ir {
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define MAX_REGISTER_FILE_SIZE 256
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass RegisterSet
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RegisterSet(const Target *);
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void init(const Target *);
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void reset(DataFile, bool resetMax = false);
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void intersect(DataFile f, const RegisterSet *);
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool assign(int32_t& reg, DataFile f, unsigned int size);
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void release(DataFile f, int32_t reg, unsigned int size);
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool occupy(DataFile f, int32_t reg, unsigned int size);
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool occupy(const Value *);
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void occupyMask(DataFile f, int32_t reg, uint8_t mask);
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline int getMaxAssigned(DataFile f) const { return fill[f]; }
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return (last[f] + 1) / 2;
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return last[f] + 1;
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline unsigned int units(DataFile f, unsigned int size) const
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return size >> unit[f];
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline unsigned int idToBytes(Value *v) const
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return v->reg.data.id * MIN2(v->reg.size, 4);
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline unsigned int idToUnits(Value *v) const
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return units(v->reg.file, idToBytes(v));
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline int bytesToId(Value *v, unsigned int bytes) const
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (v->reg.size < 4)
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return units(v->reg.file, bytes);
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return bytes / 4;
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline int unitsToId(DataFile f, int u, uint8_t size) const
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (u < 0)
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return -1;
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return (size < 4) ? u : ((u << unit[f]) / 4);
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void print() const;
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   BitSet bits[LAST_REGISTER_FILE + 1];
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int last[LAST_REGISTER_FILE + 1];
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int fill[LAST_REGISTER_FILE + 1];
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const bool restrictedGPR16Range;
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::reset(DataFile f, bool resetMax)
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[f].fill(0);
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (resetMax)
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fill[f] = -1;
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::init(const Target *targ)
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      DataFile f = static_cast<DataFile>(rf);
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      last[rf] = targ->getFileSize(f) - 1;
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unit[rf] = targ->getFileUnit(f);
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fill[rf] = -1;
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(last[rf] < MAX_REGISTER_FILE_SIZE);
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bits[rf].allocate(last[rf] + 1, true);
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::RegisterSet(const Target *targ)
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  : restrictedGPR16Range(targ->getChipset() < 0xc0)
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   init(targ);
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      reset(static_cast<DataFile>(i));
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[f].periodicMask32(lock, unlock);
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::intersect(DataFile f, const RegisterSet *set)
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[f] |= set->bits[f];
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::print() const
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO("GPR:");
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[FILE_GPR].print();
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO("\n");
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   reg = bits[f].findFreeRange(size);
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (reg < 0)
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::occupy(const Value *v)
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return occupy(v->reg.file, v->reg.data.id, v->reg.size >> unit[v->reg.file]);
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (bits[f].testRange(reg, size))
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[f].setRange(reg, size);
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegisterSet::release(DataFile f, int32_t reg, unsigned int size)
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bits[f].clrRange(reg, size);
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass RegAlloc
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RegAlloc(Program *program) : prog(program), sequence(0) { }
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool exec();
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool execFunc();
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class PhiMovesPass : public Pass {
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   private:
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      virtual bool visit(BasicBlock *);
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   };
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class ArgumentMovesPass : public Pass {
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   private:
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      virtual bool visit(BasicBlock *);
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   };
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class BuildIntervalsPass : public Pass {
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   private:
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      virtual bool visit(BasicBlock *);
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void collectLiveValues(BasicBlock *);
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void addLiveRange(Value *, const BasicBlock *, int end);
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   };
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class InsertConstraintsPass : public Pass {
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   public:
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bool exec(Function *func);
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   private:
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      virtual bool visit(BasicBlock *);
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bool insertConstraintMoves();
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void condenseDefs(Instruction *);
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void condenseSrcs(Instruction *, const int first, const int last);
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void addHazard(Instruction *i, const ValueRef *src);
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void textureMask(TexInstruction *);
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void addConstraint(Instruction *, int s, int n);
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bool detectConflict(Instruction *, int s);
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // target specific functions, TODO: put in subclass or Target
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void texConstraintNV50(TexInstruction *);
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void texConstraintNVC0(TexInstruction *);
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void texConstraintNVE0(TexInstruction *);
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      std::list<Instruction *> constrList;
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      const Target *targ;
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   };
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool buildLiveSets(BasicBlock *);
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Program *prog;
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Function *func;
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // instructions in control flow / chronological order
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ArrayList insns;
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int sequence; // for manual passes through CFG
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtypedef std::pair<Value *, Value *> ValuePair;
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass SpillCodeInserter
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool run(const std::list<ValuePair>&);
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Symbol *assignSlot(const Interval&, unsigned int size);
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline int32_t getStackSize() const { return stackSize; }
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Function *func;
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct SpillSlot
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Interval occup;
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      std::list<Value *> residents; // needed to recalculate occup
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Symbol *sym;
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int32_t offset;
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inline uint8_t size() const { return sym->reg.size; }
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   };
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<SpillSlot> slots;
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int32_t stackSize;
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int32_t stackBase;
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *unspill(Instruction *usei, LValue *, Value *slot);
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void spill(Instruction *defi, Value *slot, LValue *);
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                           const BasicBlock *bb,
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                           int end)
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *insn = val->getUniqueInsn();
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!insn)
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn = bb->getFirst();
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(bb->getFirst()->serial <= bb->getExit()->serial);
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(bb->getExit()->serial + 1 >= end);
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int begin = insn->serial;
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      begin = bb->getEntry()->serial;
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            val->id, begin, insn->serial, end);
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (begin != end) // empty ranges are only added as hazards for fixed regs
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      val->livei.extend(begin, end);
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (b->cfg.incidentCount() <= 1)
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n = 0;
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (ei.getType() == Graph::Edge::TREE ||
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          ei.getType() == Graph::Edge::FORWARD)
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ++n;
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return (n == 2);
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// For each operand of each PHI in b, generate a new value by inserting a MOV
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// at the end of the block it is coming from and replace the operand with its
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// result. This eliminates liveness conflicts and enables us to let values be
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// copied to the right register if such a conflict exists nonetheless.
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// These MOVs are also crucial in making sure the live intervals of phi srces
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// are extended until the end of the loop, since they are not included in the
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// live-in sets.
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::PhiMovesPass::visit(BasicBlock *bb)
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *phi, *mov;
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   BasicBlock *pb, *pn;
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::stack<BasicBlock *> stack;
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pb = BasicBlock::get(ei.getNode());
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(pb);
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (needNewElseBlock(bb, pb))
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         stack.push(pb);
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (!stack.empty()) {
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pb = stack.top();
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pn = new BasicBlock(func);
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      stack.pop();
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pb->cfg.detach(&bb->cfg);
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(pb->getExit()->op != OP_CALL);
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (pb->getExit()->asFlow()->target.bb == bb)
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         pb->getExit()->asFlow()->target.bb = pn;
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // insert MOVs (phi->src(j) should stem from j-th in-BB)
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int j = 0;
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      pb = BasicBlock::get(ei.getNode());
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!pb->isTerminated())
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov = new_Instruction(func, OP_MOV, TYPE_U32);
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov->setSrc(0, phi->getSrc(j));
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         phi->setSrc(j, mov->getDef(0));
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         pb->insertBefore(pb->getExit(), mov);
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ++j;
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // Bind function call inputs/outputs to the same physical register
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // the callee uses, inserting moves as appropriate for the case a
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // conflict arises.
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Instruction *i = bb->getEntry(); i; i = i->next) {
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      FlowInstruction *cal = i->asFlow();
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!cal || cal->op != OP_CALL || cal->builtin)
393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RegisterSet clobberSet(prog->getTarget());
395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Bind input values.
397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int s = 0; cal->srcExists(s); ++s) {
398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp->reg.data.id = cal->target.fn->ins[s].rep()->reg.data.id;
400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Instruction *mov =
402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov->setDef(0, tmp);
404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov->setSrc(0, cal->getSrc(s));
405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         cal->setSrc(s, tmp);
406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->insertBefore(cal, mov);
408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Bind output values.
411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int d = 0; cal->defExists(d); ++d) {
412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Instruction *mov =
416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov->setSrc(0, tmp);
418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mov->setDef(0, cal->getDef(d));
419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         cal->setDef(d, tmp);
420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->insertAfter(cal, mov);
422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         clobberSet.occupy(tmp);
423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Bind clobbered values.
426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           it != cal->target.fn->clobbers.end();
428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           ++it) {
429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (clobberSet.occupy(*it)) {
430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Value *tmp = new_LValue(func, (*it)->asLValue());
431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            tmp->reg.data.id = (*it)->reg.data.id;
432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cal->setDef(cal->defCount(), tmp);
433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // Update the clobber set of the function.
438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (BasicBlock::get(func->cfgExit) == bb) {
439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      func->buildDefSets();
440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (bb->defSet.test(i))
442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            func->clobbers.push_back(func->getLValue(i));
443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Build the set of live-in variables of bb.
449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::buildLiveSets(BasicBlock *bb)
451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Function *f = bb->getFunction();
453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   BasicBlock *bn;
454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *i;
455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int s, d;
456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bb->liveSet.allocate(func->allLValues.getSize(), false);
460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n = 0;
462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bn = BasicBlock::get(ei.getNode());
464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (bn == bb)
465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (bn->cfg.visit(sequence))
467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!buildLiveSets(bn))
468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            return false;
469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (n++ || bb->liveSet.marker)
470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->liveSet |= bn->liveSet;
471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      else
472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->liveSet = bn->liveSet;
473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!n && !bb->liveSet.marker)
475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bb->liveSet.fill(0);
476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bb->liveSet.marker = true;
477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO("BB:%i live set of out blocks:\n", bb->getId());
480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bb->liveSet.print();
481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // if (!bb->getEntry())
484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   //   return true;
485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (bb == BasicBlock::get(f->cfgExit)) {
487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (std::deque<ValueRef>::iterator it = f->outs.begin();
488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           it != f->outs.end(); ++it) {
489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         assert(it->get()->asLValue());
490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->liveSet.set(it->get()->id);
491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (d = 0; i->defExists(d); ++d)
496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->liveSet.clr(i->getDef(d)->id);
497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (s = 0; i->srcExists(s); ++s)
498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (i->getSrc(s)->asLValue())
499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            bb->liveSet.set(i->getSrc(s)->id);
500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bb->liveSet.clr(i->getDef(0)->id);
503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO("BB:%i live set after propagation:\n", bb->getId());
506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bb->liveSet.print();
507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   BasicBlock *bbA = NULL, *bbB = NULL;
516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (bb->cfg.outgoingCount()) {
518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // trickery to save a loop of OR'ing liveSets
519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // aliasing works fine with BitSet::setOr
520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (ei.getType() == Graph::Edge::DUMMY)
522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            continue;
523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (bbA) {
524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            bbA = bb;
526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else {
527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            bbA = bbB;
528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bbB = BasicBlock::get(ei.getNode());
530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else
533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (bb->cfg.incidentCount()) {
534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bb->liveSet.fill(0);
535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   collectLiveValues(bb);
542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // go through out blocks and delete phi sources that do not originate from
546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // the current block from the live set
547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      BasicBlock *out = BasicBlock::get(ei.getNode());
549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->liveSet.clr(i->getDef(0)->id);
552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (int s = 0; i->srcExists(s); ++s) {
554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(i->src(s).getInsn());
555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               bb->liveSet.set(i->getSrc(s)->id);
557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            else
558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               bb->liveSet.clr(i->getSrc(s)->id);
559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // remaining live-outs are live until end
564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (bb->getExit()) {
565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (bb->liveSet.test(j))
567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int d = 0; i->defExists(d); ++d) {
572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         bb->liveSet.clr(i->getDef(d)->id);
573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            i->getDef(d)->livei.extend(i->serial, i->serial);
575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int s = 0; i->srcExists(s); ++s) {
578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!i->getSrc(s)->asLValue())
579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            continue;
580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!bb->liveSet.test(i->getSrc(s)->id)) {
581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            bb->liveSet.set(i->getSrc(s)->id);
582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addLiveRange(i->getSrc(s), bb, i->serial);
583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (bb == BasicBlock::get(func->cfg.getRoot())) {
588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (std::deque<ValueDef>::iterator it = func->ins.begin();
589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           it != func->ins.end(); ++it) {
590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            it->get()->livei.extend(0, 1);
592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define JOIN_MASK_PHI        (1 << 0)
600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define JOIN_MASK_UNION      (1 << 1)
601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define JOIN_MASK_MOV        (1 << 2)
602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define JOIN_MASK_TEX        (1 << 3)
603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass GCRA
605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GCRA(Function *, SpillCodeInserter&);
608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ~GCRA();
609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool allocateRegisters(ArrayList& insns);
611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void printNodeInfo() const;
613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class RIG_Node : public Graph::Node
616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   public:
618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RIG_Node();
619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void init(const RegisterSet&, LValue *);
621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void addInterference(RIG_Node *);
623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void addRegPreference(RIG_Node *);
624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inline LValue *getValue() const
626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return reinterpret_cast<LValue *>(data);
628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inline void setValue(LValue *lval) { data = lval; }
630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inline uint8_t getCompMask() const
632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return ((1 << colors) - 1) << (reg & 7);
634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      static inline RIG_Node *get(const Graph::EdgeIterator& ei)
637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      {
638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return static_cast<RIG_Node *>(ei.getNode());
639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   public:
642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint32_t degree;
643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      uint16_t colors;
645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      DataFile f;
647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int32_t reg;
648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      float weight;
650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // list pointers for simplify() phase
652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RIG_Node *next;
653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RIG_Node *prev;
654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // union of the live intervals of all coalesced values (we want to retain
656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      //  the separate intervals for testing interference of compound values)
657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Interval livei;
658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      std::list<RIG_Node *> prefRegs;
660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   };
661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void buildRIG(ArrayList&);
666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool coalesce(ArrayList&);
667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool doCoalesce(ArrayList&, unsigned int mask);
668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void calculateSpillWeights();
669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void simplify();
670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool selectRegisters();
671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void cleanup(const bool success);
672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void simplifyEdge(RIG_Node *, RIG_Node *);
674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void simplifyNode(RIG_Node *);
675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool coalesceValues(Value *, Value *, bool force);
677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void resolveSplitsAndMerges();
678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void makeCompound(Instruction *, bool isSplit);
679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void checkList(std::list<RIG_Node *>&);
684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::stack<uint32_t> stack;
687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // list headers for simplify() phase
689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RIG_Node lo[2];
690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RIG_Node hi;
691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Graph RIG;
693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RIG_Node *nodes;
694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int nodeCount;
695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Function *func;
697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Program *prog;
698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   static uint8_t relDegree[17][17];
700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RegisterSet regs;
702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // need to fixup register id for participants of OP_MERGE/SPLIT
704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<Instruction *> merges;
705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<Instruction *> splits;
706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   SpillCodeInserter& spill;
708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<ValuePair> mustSpill;
709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orguint8_t GCRA::relDegree[17][17];
712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   colors = 0;
716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::printNodeInfo() const
720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned int i = 0; i < nodeCount; ++i) {
722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!nodes[i].colors)
723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           i,
726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           nodes[i].f,nodes[i].reg,nodes[i].colors,
727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           nodes[i].weight,
728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           nodes[i].degree, nodes[i].degreeLimit);
729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO("\n");
735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   setValue(lval);
742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (lval->reg.data.id >= 0)
743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lval->noSpill = lval->fixedReg = 1;
744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   colors = regs.units(lval->reg.file, lval->reg.size);
746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   f = lval->reg.file;
747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   reg = -1;
748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (lval->reg.data.id >= 0)
749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      reg = regs.idToUnits(lval);
750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   weight = std::numeric_limits<float>::infinity();
752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   degree = 0;
753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   degreeLimit = regs.getFileSize(f, lval->reg.size);
754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   livei.insert(lval->livei);
756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::coalesceValues(Value *dst, Value *src, bool force)
760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *rep = dst->join->asLValue();
762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *val = src->join->asLValue();
763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!force && val->reg.data.id >= 0) {
765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      rep = src->join->asLValue();
766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      val = dst->join->asLValue();
767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RIG_Node *nRep = &nodes[rep->id];
769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RIG_Node *nVal = &nodes[val->id];
770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (src->reg.file != dst->reg.file) {
772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!force)
773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return false;
774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      WARN("forced coalescing of values in different files !\n");
775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!force && dst->reg.size != src->reg.size)
777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (force) {
781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (val->reg.data.id >= 0)
782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            WARN("forced coalescing of values in different fixed regs !\n");
783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (val->reg.data.id >= 0)
785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            return false;
786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // make sure that there is no overlap with the fixed register of rep
787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (ArrayList::Iterator it = func->allLValues.iterator();
788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              !it.end(); it.next()) {
789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(reg);
791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               return false;
793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!force && nRep->livei.overlaps(nVal->livei))
798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            rep->id, rep->reg.data.id, val->id);
802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // set join pointer of all values joined with val
804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++def)
806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      (*def)->get()->join = rep;
807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(rep->join == rep && val->join == rep);
808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // add val's definitions to rep and extend the live interval of its RIG node
810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   nRep->livei.unify(nVal->livei);
812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::coalesce(ArrayList& insns)
817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool ret = doCoalesce(insns, JOIN_MASK_PHI);
819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret)
820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0x50:
823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0x80:
824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0x90:
825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0xa0:
826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0xc0:
829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0xd0:
830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 0xe0:
831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ret = doCoalesce(insns, JOIN_MASK_UNION);
832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret)
837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return doCoalesce(insns, JOIN_MASK_MOV);
839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic inline uint8_t makeCompMask(int compSize, int base, int size)
842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint8_t m = ((1 << size) - 1) << base;
844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (compSize) {
846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 1:
847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return 0xff;
848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 2:
849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      m |= (m << 2);
850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return (m << 4) | m;
851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 3:
852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case 4:
853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return (m << 4) | m;
854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   default:
855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(compSize <= 8);
856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return m;
857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic inline void copyCompound(Value *dst, Value *src)
861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *ldst = dst->asLValue();
863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *lsrc = src->asLValue();
864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ldst->compound = lsrc->compound;
866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ldst->compMask = lsrc->compMask;
867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::makeCompound(Instruction *insn, bool split)
871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO("makeCompound(split = %i): ", split);
876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->print();
877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const unsigned int size = getNode(rep)->colors;
880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int base = 0;
881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!rep->compound)
883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      rep->compMask = 0xff;
884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   rep->compound = 1;
885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      val->compound = 1;
890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!val->compMask)
891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         val->compMask = 0xff;
892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      val->compMask &= makeCompMask(size, base, getNode(val)->colors);
893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(val->compMask);
894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           rep->id, rep->compMask, val->id, val->compMask);
897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      base += getNode(val)->colors;
899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(base == size);
901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::doCoalesce(ArrayList& insns, unsigned int mask)
905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int c, n;
907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (n = 0; n < insns.getSize(); ++n) {
909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *i;
910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (insn->op) {
913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_PHI:
914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!(mask & JOIN_MASK_PHI))
915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (c = 0; insn->srcExists(c); ++c)
917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               // this is bad
919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               ERROR("failed to coalesce phi operands\n");
920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               return false;
921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_UNION:
924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_MERGE:
925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!(mask & JOIN_MASK_UNION))
926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (c = 0; insn->srcExists(c); ++c)
928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            coalesceValues(insn->getDef(0), insn->getSrc(c), true);
929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (insn->op == OP_MERGE) {
930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            merges.push_back(insn);
931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (insn->srcExists(1))
932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               makeCompound(insn, false);
933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_SPLIT:
936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!(mask & JOIN_MASK_UNION))
937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         splits.push_back(insn);
939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (c = 0; insn->defExists(c); ++c)
940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            coalesceValues(insn->getSrc(0), insn->getDef(c), true);
941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         makeCompound(insn, true);
942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_MOV:
944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!(mask & JOIN_MASK_MOV))
945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         i = NULL;
947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!insn->getDef(0)->uses.empty())
948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            i = insn->getDef(0)->uses.front()->getInsn();
949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // if this is a contraint-move there will only be a single use
950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (i && i->op == OP_MERGE) // do we really still need this ?
951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         i = insn->getSrc(0)->getUniqueInsn();
953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (i && !i->constrainedDefs()) {
954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               copyCompound(insn->getSrc(0), insn->getDef(0));
956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TEX:
959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TXB:
960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TXL:
961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TXF:
962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TXQ:
963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TXD:
964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TXG:
965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case OP_TEXCSAA:
966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!(mask & JOIN_MASK_TEX))
967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            coalesceValues(insn->getDef(c), insn->getSrc(c), true);
970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::RIG_Node::addInterference(RIG_Node *node)
980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   this->degree += relDegree[node->colors][colors];
982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   node->degree += relDegree[colors][node->colors];
983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   this->attach(node, Graph::Edge::CROSS);
985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::RIG_Node::addRegPreference(RIG_Node *node)
989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   prefRegs.push_back(node);
991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   func(fn),
995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   regs(fn->getProgram()->getTarget()),
996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   spill(spill)
997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   prog = func->getProgram();
999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // initialize relative degrees array - i takes away from j
1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 1; i <= 16; ++i)
1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int j = 1; j <= 16; ++j)
1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         relDegree[i][j] = j * ((i + j - 1) / j);
1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::~GCRA()
1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (nodes)
1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      delete[] nodes;
1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::checkList(std::list<RIG_Node *>& lst)
1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GCRA::RIG_Node *prev = NULL;
1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::list<RIG_Node *>::iterator it = lst.begin();
1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        it != lst.end();
1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++it) {
1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert((*it)->getValue()->join == (*it)->getValue());
1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (prev)
1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         assert(prev->livei.begin() <= (*it)->livei.begin());
1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      prev = *it;
1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (node->livei.isEmpty())
1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // only the intervals of joined values don't necessarily arrive in order
1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<RIG_Node *>::iterator prev, it;
1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (it = list.end(); it != list.begin(); it = prev) {
1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      prev = it;
1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      --prev;
1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if ((*prev)->livei.begin() <= node->livei.begin())
1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   list.insert(it, node);
1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::buildRIG(ArrayList& insns)
1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<RIG_Node *> values, active;
1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::deque<ValueDef>::iterator it = func->ins.begin();
1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        it != func->ins.end(); ++it)
1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insertOrderedTail(values, getNode(it->get()->asLValue()));
1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < insns.getSize(); ++i) {
1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int d = 0; insn->defExists(d); ++d)
1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (insn->getDef(d)->rep() == insn->getDef(d))
1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   checkList(values);
1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (!values.empty()) {
1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RIG_Node *cur = values.front();
1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (std::list<RIG_Node *>::iterator it = active.begin();
1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           it != active.end();
1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           ++it) {
1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         RIG_Node *node = *it;
1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (node->livei.end() <= cur->livei.begin()) {
1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            it = active.erase(it);
1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            --it;
1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else
1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (node->f == cur->f && node->livei.overlaps(cur->livei)) {
1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cur->addInterference(node);
1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      values.pop_front();
1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      active.push_back(cur);
1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::calculateSpillWeights()
1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned int i = 0; i < nodeCount; ++i) {
1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RIG_Node *const n = &nodes[i];
1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!nodes[i].colors || nodes[i].livei.isEmpty())
1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (nodes[i].reg >= 0) {
1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // update max reg
1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         regs.occupy(n->f, n->reg, n->colors);
1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *val = nodes[i].getValue();
1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!val->noSpill) {
1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         int rc = 0;
1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (Value::DefIterator it = val->defs.begin();
1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              it != val->defs.end();
1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              ++it)
1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            rc += (*it)->get()->refCount();
1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nodes[i].weight =
1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            (float)rc * (float)rc / (float)nodes[i].livei.extent();
1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (nodes[i].degree < nodes[i].degreeLimit) {
1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         int l = 0;
1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (val->reg.size > 4)
1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            l = 1;
1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         DLLIST_ADDHEAD(&lo[l], &nodes[i]);
1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         DLLIST_ADDHEAD(&hi, &nodes[i]);
1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      printNodeInfo();
1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool move = b->degree >= b->degreeLimit;
1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC,
1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            a->getValue()->id, a->degree, a->degreeLimit,
1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            b->getValue()->id, b->degree, b->degreeLimit);
1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   b->degree -= relDegree[a->colors][b->colors];
1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   move = move && b->degree < b->degreeLimit;
1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (move && !DLLIST_EMPTY(b)) {
1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int l = (b->getValue()->reg.size > 4) ? 1 : 0;
1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      DLLIST_DEL(b);
1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      DLLIST_ADDTAIL(&lo[l], b);
1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::simplifyNode(RIG_Node *node)
1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      simplifyEdge(node, RIG_Node::get(ei));
1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      simplifyEdge(node, RIG_Node::get(ei));
1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   DLLIST_DEL(node);
1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   stack.push(node->getValue()->id);
1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            node->getValue()->id,
1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            (node->degree < node->degreeLimit) ? "" : "(spill)");
1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::simplify()
1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (;;) {
1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!DLLIST_EMPTY(&lo[0])) {
1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         do {
1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            simplifyNode(lo[0].next);
1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } while (!DLLIST_EMPTY(&lo[0]));
1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else
1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!DLLIST_EMPTY(&lo[1])) {
1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         simplifyNode(lo[1].next);
1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else
1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!DLLIST_EMPTY(&hi)) {
1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         RIG_Node *best = hi.next;
1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         float bestScore = best->weight / (float)best->degree;
1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // spill candidate
1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (RIG_Node *it = best->next; it != &hi; it = it->next) {
1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            float score = it->weight / (float)it->degree;
1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (score < bestScore) {
1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               best = it;
1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               bestScore = score;
1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (isinf(bestScore)) {
1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            ERROR("no viable spill candidates left\n");
1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         simplifyNode(best);
1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const RIG_Node *intf = RIG_Node::get(ei);
1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (intf->reg < 0)
1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const LValue *vA = node->getValue();
1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const LValue *vB = intf->getValue();
1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (vA->compound | vB->compound) {
1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // NOTE: this only works for >aligned< register tuples !
1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) {
1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) {
1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         const LValue *vD = (*D)->get()->asLValue();
1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         const LValue *vd = (*d)->get()->asLValue();
1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (!vD->livei.overlaps(vd->livei)) {
1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                     vD->id, vd->id);
1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            continue;
1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         uint8_t mask = vD->compound ? vD->compMask : ~0;
1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (vd->compound) {
1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(vB->compound);
1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mask &= vd->compMask & vB->compMask;
1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else {
1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mask &= intfMask;
1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         INFO_DBG(prog->dbgFlags, REG_ALLOC,
1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  vD->id,
1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  vD->compound ? vD->compMask : 0xff,
1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  vd->id,
1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  vd->compound ? vd->compMask : intfMask,
1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  vB->compMask, intf->reg & ~7, mask);
1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (mask)
1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            regs.occupyMask(node->f, intf->reg & ~7, mask);
1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else {
1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO_DBG(prog->dbgFlags, REG_ALLOC,
1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               "(%%%i) X (%%%i): $r%i + %u\n",
1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               vA->id, vB->id, intf->reg, intf->colors);
1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs.occupy(node->f, intf->reg, intf->colors);
1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::selectRegisters()
1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (!stack.empty()) {
1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      RIG_Node *node = &nodes[stack.top()];
1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      stack.pop();
1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs.reset(node->f);
1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               node->getValue()->id, node->colors);
1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         checkInterference(node, ei);
1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         checkInterference(node, ei);
1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!node->prefRegs.empty()) {
1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              it != node->prefRegs.end();
1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              ++it) {
1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if ((*it)->reg >= 0 &&
1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                regs.occupy(node->f, (*it)->reg, node->colors)) {
1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               node->reg = (*it)->reg;
1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               break;
1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (node->reg >= 0)
1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *lval = node->getValue();
1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         regs.print();
1277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bool ret = regs.assign(node->reg, node->f, node->colors);
1278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (ret) {
1279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
1280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         lval->compMask = node->getCompMask();
1281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
1282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
1283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  lval->id, lval->reg.size);
1284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Symbol *slot = NULL;
1285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (lval->reg.file == FILE_GPR)
1286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            slot = spill.assignSlot(node->livei, lval->reg.size);
1287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mustSpill.push_back(ValuePair(lval, slot));
1288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!mustSpill.empty())
1291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
1292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned int i = 0; i < nodeCount; ++i) {
1293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *lval = nodes[i].getValue();
1294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (nodes[i].reg >= 0 && nodes[i].colors > 0)
1295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         lval->reg.data.id =
1296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
1297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
1299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::allocateRegisters(ArrayList& insns)
1303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool ret;
1305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC,
1307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            "allocateRegisters to %u instructions\n", insns.getSize());
1308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   nodeCount = func->allLValues.getSize();
1310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   nodes = new RIG_Node[nodeCount];
1311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!nodes)
1312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
1313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (unsigned int i = 0; i < nodeCount; ++i) {
1314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
1315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (lval) {
1316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         nodes[i].init(regs, lval);
1317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         RIG.insert(&nodes[i]);
1318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // coalesce first, we use only 1 RIG node for a group of joined values
1322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ret = coalesce(insns);
1323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret)
1324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      goto out;
1325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      func->printLiveIntervals();
1328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   buildRIG(insns);
1330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   calculateSpillWeights();
1331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   simplify();
1332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ret = selectRegisters();
1334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret) {
1335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      INFO_DBG(prog->dbgFlags, REG_ALLOC,
1336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               "selectRegisters failed, inserting spill code ...\n");
1337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs.reset(FILE_GPR, true);
1338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      spill.run(mustSpill);
1339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         func->print();
1341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else {
1342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      prog->maxGPR = regs.getMaxAssigned(FILE_GPR);
1343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgout:
1346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   cleanup(ret);
1347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return ret;
1348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::cleanup(const bool success)
1352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   mustSpill.clear();
1354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (ArrayList::Iterator it = func->allLValues.iterator();
1356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        !it.end(); it.next()) {
1357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *lval =  reinterpret_cast<LValue *>(it.get());
1358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lval->livei.clear();
1360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lval->compound = 0;
1362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lval->compMask = 0;
1363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (lval->join == lval)
1365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
1366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (success) {
1368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         lval->reg.data.id = lval->join->reg.data.id;
1369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
1370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
1371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              ++d)
1372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            lval->join->defs.remove(*d);
1373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         lval->join = lval;
1374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (success)
1378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      resolveSplitsAndMerges();
1379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   splits.clear(); // avoid duplicate entries on next coalesce pass
1380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   merges.clear();
1381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   delete[] nodes;
1383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   nodes = NULL;
1384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSymbol *
1387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSpillCodeInserter::assignSlot(const Interval &livei, unsigned int size)
1388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   SpillSlot slot;
1390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int32_t offsetBase = stackSize;
1391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int32_t offset;
1392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
1393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (offsetBase % size)
1395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      offsetBase += size - (offsetBase % size);
1396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   slot.sym = NULL;
1398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (offset = offsetBase; offset < stackSize; offset += size) {
1400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      while (it != slots.end() && it->offset < offset)
1401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ++it;
1402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (it == slots.end()) // no slots left
1403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      std::list<SpillSlot>::iterator bgn = it;
1405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      while (it != slots.end() && it->offset < (offset + size)) {
1407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         it->occup.print();
1408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (it->occup.overlaps(livei))
1409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ++it;
1411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (it == slots.end() || it->offset >= (offset + size)) {
1413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // fits
1414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (; bgn != slots.end() && bgn->offset < (offset + size); ++bgn) {
1415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            bgn->occup.insert(livei);
1416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (bgn->size() == size)
1417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               slot.sym = bgn->sym;
1418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!slot.sym) {
1423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      stackSize = offset + size;
1424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      slot.offset = offset;
1425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
1426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!func->stackPtr)
1427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         offset += func->tlsBase;
1428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      slot.sym->setAddress(NULL, offset);
1429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      slot.sym->reg.size = size;
1430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      slots.insert(pos, slot)->occup.insert(livei);
1431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return slot.sym;
1433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
1437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const DataType ty = typeOfSize(slot->reg.size);
1439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *st;
1441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (slot->reg.file == FILE_MEMORY_LOCAL) {
1442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      st = new_Instruction(func, OP_STORE, ty);
1443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      st->setSrc(0, slot);
1444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      st->setSrc(1, lval);
1445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lval->noSpill = 1;
1446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else {
1447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      st = new_Instruction(func, OP_CVT, ty);
1448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      st->setDef(0, slot);
1449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      st->setSrc(0, lval);
1450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   defi->bb->insertAfter(defi, st);
1452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLValue *
1455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
1456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const DataType ty = typeOfSize(slot->reg.size);
1458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   lval = cloneShallow(func, lval);
1460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *ld;
1462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (slot->reg.file == FILE_MEMORY_LOCAL) {
1463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      lval->noSpill = 1;
1464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ld = new_Instruction(func, OP_LOAD, ty);
1465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else {
1466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ld = new_Instruction(func, OP_CVT, ty);
1467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ld->setDef(0, lval);
1469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ld->setSrc(0, slot);
1470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   usei->bb->insertBefore(usei, ld);
1472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return lval;
1473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSpillCodeInserter::run(const std::list<ValuePair>& lst)
1477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
1479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++it) {
1480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      LValue *lval = it->first->asLValue();
1481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Symbol *mem = it->second ? it->second->asSym() : NULL;
1482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
1484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           ++d) {
1485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Value *slot = mem ?
1486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
1487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Value *tmp = NULL;
1488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Instruction *last = NULL;
1489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         LValue *dval = (*d)->get()->asLValue();
1491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Instruction *defi = (*d)->getInsn();
1492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // handle uses first or they'll contain the spill stores
1494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         while (!dval->uses.empty()) {
1495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            ValueRef *u = dval->uses.front();
1496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Instruction *usei = u->getInsn();
1497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(usei);
1498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (usei->op == OP_PHI) {
1499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
1500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               last = NULL;
1501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            } else
1502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!last || usei != last->next) { // TODO: sort uses
1503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               tmp = unspill(usei, dval, slot);
1504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               last = usei;
1505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
1506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            u->set(tmp);
1507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         assert(defi);
1510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (defi->op == OP_PHI) {
1511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            d = lval->defs.erase(d);
1512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            --d;
1513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (slot->reg.file == FILE_MEMORY_LOCAL)
1514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               delete_Instruction(func->getProgram(), defi);
1515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            else
1516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               defi->setDef(0, slot);
1517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         } else {
1518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            spill(defi, slot, dval);
1519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // TODO: We're not trying to reuse old slots in a potential next iteration.
1525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   //  We have to update the slots' livei intervals to be able to do that.
1526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   stackBase = stackSize;
1527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   slots.clear();
1528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
1529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::exec()
1533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (IteratorRef it = prog->calls.iteratorDFS(false);
1535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        !it->end(); it->next()) {
1536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
1537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      func->tlsBase = prog->tlsSize;
1539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!execFunc())
1540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return false;
1541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      prog->tlsSize += func->tlsSize;
1542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
1544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::execFunc()
1548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   InsertConstraintsPass insertConstr;
1550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   PhiMovesPass insertPhiMoves;
1551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ArgumentMovesPass insertArgMoves;
1552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   BuildIntervalsPass buildIntervals;
1553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   SpillCodeInserter insertSpills(func);
1554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GCRA gcra(func, insertSpills);
1556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int i, retries;
1558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool ret;
1559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ret = insertConstr.exec(func);
1561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret)
1562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      goto out;
1563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ret = insertPhiMoves.run(func);
1565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret)
1566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      goto out;
1567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ret = insertArgMoves.run(func);
1569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ret)
1570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      goto out;
1571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // TODO: need to fix up spill slot usage ranges to support > 1 retry
1573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (retries = 0; retries < 3; ++retries) {
1574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
1575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         INFO("Retry: %i\n", retries);
1576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         func->print();
1578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // spilling to registers may add live ranges, need to rebuild everything
1580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ret = true;
1581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (sequence = func->cfg.nextSequence(), i = 0;
1582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           ret && i <= func->loopNestingBound;
1583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org           sequence = func->cfg.nextSequence(), ++i)
1584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
1585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!ret)
1586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      func->orderInstructions(this->insns);
1588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ret = buildIntervals.run(func);
1590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!ret)
1591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ret = gcra.allocateRegisters(insns);
1593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (ret)
1594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break; // success
1595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
1597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   func->tlsSize = insertSpills.getStackSize();
1599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgout:
1600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return ret;
1601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// TODO: check if modifying Instruction::join here breaks anything
1604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGCRA::resolveSplitsAndMerges()
1606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::list<Instruction *>::iterator it = splits.begin();
1608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        it != splits.end();
1609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++it) {
1610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *split = *it;
1611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int reg = regs.idToBytes(split->getSrc(0));
1612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int d = 0; split->defExists(d); ++d) {
1613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Value *v = split->getDef(d);
1614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         v->reg.data.id = regs.bytesToId(v, reg);
1615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         v->join = v;
1616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         reg += v->reg.size;
1617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   splits.clear();
1620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::list<Instruction *>::iterator it = merges.begin();
1622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        it != merges.end();
1623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++it) {
1624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *merge = *it;
1625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int reg = regs.idToBytes(merge->getDef(0));
1626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int s = 0; merge->srcExists(s); ++s) {
1627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         Value *v = merge->getSrc(s);
1628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         v->reg.data.id = regs.bytesToId(v, reg);
1629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         v->join = v;
1630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         reg += v->reg.size;
1631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   merges.clear();
1634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool Program::registerAllocation()
1637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   RegAlloc ra(this);
1639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return ra.exec();
1640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::exec(Function *ir)
1644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   constrList.clear();
1646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool ret = run(ir, true, true);
1648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (ret)
1649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ret = insertConstraintMoves();
1650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return ret;
1651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// TODO: make part of texture insn
1654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
1656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Value *def[4];
1658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int c, k, d;
1659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint8_t mask = 0;
1660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (d = 0, k = 0, c = 0; c < 4; ++c) {
1662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!(tex->tex.mask & (1 << c)))
1663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         continue;
1664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (tex->getDef(k)->refCount()) {
1665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         mask |= 1 << c;
1666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         def[d++] = tex->getDef(k);
1667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ++k;
1669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   tex->tex.mask = mask;
1671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (c = 0; c < d; ++c)
1673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tex->setDef(c, def[c]);
1674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (; c < 4; ++c)
1675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tex->setDef(c, NULL);
1676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
1680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Value *v = cst->getSrc(s);
1682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // current register allocation can't handle it if a value participates in
1684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // multiple constraints
1685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
1686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (cst != (*it)->getInsn())
1687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return true;
1688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // can start at s + 1 because detectConflict is called on all sources
1691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int c = s + 1; cst->srcExists(c); ++c)
1692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (v == cst->getSrc(c))
1693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return true;
1694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *defi = v->getInsn();
1696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return (!defi || defi->constrainedDefs());
1698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
1702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *cst;
1704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int d;
1705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // first, look for an existing identical constraint op
1707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::list<Instruction *>::iterator it = constrList.begin();
1708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        it != constrList.end();
1709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++it) {
1710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      cst = (*it);
1711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!i->bb->dominatedBy(cst->bb))
1712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         break;
1713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (d = 0; d < n; ++d)
1714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (cst->getSrc(d) != i->getSrc(d + s))
1715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (d >= n) {
1717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (d = 0; d < n; ++d, ++s)
1718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            i->setSrc(s, cst->getDef(d));
1719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         return;
1720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
1723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (d = 0; d < n; ++s, ++d) {
1725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      cst->setDef(d, new_LValue(func, FILE_GPR));
1726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      cst->setSrc(d, i->getSrc(s));
1727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      i->setSrc(s, cst->getDef(d));
1728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   i->bb->insertBefore(i, cst);
1730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   constrList.push_back(cst);
1732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Add a dummy use of the pointer source of >= 8 byte loads after the load
1735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// to prevent it from being assigned a register which overlapping the load's
1736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// destination, which would produce random corruptions.
1737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
1739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
1741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hzd->setSrc(0, src->get());
1742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   i->bb->insertAfter(i, hzd);
1743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
1747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
1749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint8_t size = 0;
1751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n;
1752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
1753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      size += insn->getDef(n)->reg.size;
1754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (n < 2)
1755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
1756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *lval = new_LValue(func, FILE_GPR);
1757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   lval->reg.size = size;
1758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
1760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   split->setSrc(0, lval);
1761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int d = 0; d < n; ++d) {
1762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      split->setDef(d, insn->getDef(d));
1763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->setDef(d, NULL);
1764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn->setDef(0, lval);
1766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
1768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->setDef(k, insn->getDef(d));
1769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->setDef(d, NULL);
1770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // carry over predicate if any (mainly for OP_UNION uses)
1772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   split->setPredicate(insn->cc, insn->getPredicate());
1773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn->bb->insertAfter(insn, split);
1775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   constrList.push_back(split);
1776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
1779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                              const int a, const int b)
1780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   uint8_t size = 0;
1782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (a >= b)
1783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
1784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int s = a; s <= b; ++s)
1785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      size += insn->getSrc(s)->reg.size;
1786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!size)
1787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
1788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   LValue *lval = new_LValue(func, FILE_GPR);
1789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   lval->reg.size = size;
1790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Value *save[3];
1792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn->takeExtraSources(0, save);
1793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
1795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   merge->setDef(0, lval);
1796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int s = a, i = 0; s <= b; ++s, ++i) {
1797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      merge->setSrc(i, insn->getSrc(s));
1798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->setSrc(s, NULL);
1799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn->setSrc(a, lval);
1801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) {
1803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->setSrc(k, insn->getSrc(s));
1804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      insn->setSrc(s, NULL);
1805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn->bb->insertBefore(insn, merge);
1807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   insn->putExtraSources(0, save);
1809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   constrList.push_back(merge);
1811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
1815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   textureMask(tex);
1817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   condenseDefs(tex);
1818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n = tex->srcCount(0xff, true);
1820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (n > 4) {
1821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      condenseSrcs(tex, 0, 3);
1822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (n > 5) // NOTE: first call modified positions already
1823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
1824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else
1825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (n > 1) {
1826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      condenseSrcs(tex, 0, n - 1);
1827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
1832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n, s;
1834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   textureMask(tex);
1836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (tex->op == OP_TXQ) {
1838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      s = tex->srcCount(0xff);
1839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      n = 0;
1840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else {
1841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      s = tex->tex.target.getArgCount();
1842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!tex->tex.target.isArray() &&
1843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
1844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ++s;
1845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (tex->op == OP_TXD && tex->tex.useOffsets)
1846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ++s;
1847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      n = tex->srcCount(0xff) - s;
1848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assert(n <= 4);
1849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (s > 1)
1852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      condenseSrcs(tex, 0, s - 1);
1853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (n > 1) // NOTE: first call modified positions already
1854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      condenseSrcs(tex, 1, n);
1855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   condenseDefs(tex);
1857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
1860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
1861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Value *pred = tex->getPredicate();
1863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (pred)
1864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tex->setPredicate(tex->cc, NULL);
1865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   textureMask(tex);
1867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(tex->defExists(0) && tex->srcExists(0));
1869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   // make src and def count match
1870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int c;
1871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
1872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!tex->srcExists(c))
1873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
1874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!tex->defExists(c))
1875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
1876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (pred)
1878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      tex->setPredicate(tex->cc, pred);
1879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   condenseDefs(tex);
1880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   condenseSrcs(tex, 0, c - 1);
1881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Insert constraint markers for instructions whose multiple sources must be
1884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// located in consecutive registers.
1885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
1887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   TexInstruction *tex;
1889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   Instruction *next;
1890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int s, size;
1891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   targ = bb->getProgram()->getTarget();
1893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (Instruction *i = bb->getEntry(); i; i = next) {
1895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      next = i->next;
1896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if ((tex = i->asTex())) {
1898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         switch (targ->getChipset() & ~0xf) {
1899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0x50:
1900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0x80:
1901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0x90:
1902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0xa0:
1903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            texConstraintNV50(tex);
1904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0xc0:
1906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0xd0:
1907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            texConstraintNVC0(tex);
1908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         case 0xe0:
1910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            texConstraintNVE0(tex);
1911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         default:
1913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
1914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else
1916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (i->op == OP_EXPORT || i->op == OP_STORE) {
1917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
1918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(i->srcExists(s));
1919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            size -= i->getSrc(s)->reg.size;
1920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         condenseSrcs(i, 1, s - 1);
1922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else
1923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (i->op == OP_LOAD || i->op == OP_VFETCH) {
1924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         condenseDefs(i);
1925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
1926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            addHazard(i, i->src(0).getIndirect(0));
1927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else
1928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (i->op == OP_UNION) {
1929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         constrList.push_back(i);
1930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
1933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Insert extra moves so that, if multiple register constraints on a value are
1936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// in conflict, these conflicts can be resolved.
1937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
1938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgRegAlloc::InsertConstraintsPass::insertConstraintMoves()
1939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (std::list<Instruction *>::iterator it = constrList.begin();
1941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        it != constrList.end();
1942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        ++it) {
1943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *cst = *it;
1944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      Instruction *mov;
1945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (cst->op == OP_SPLIT && 0) {
1947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         // spilling splits is annoying, just make sure they're separate
1948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (int d = 0; cst->defExists(d); ++d) {
1949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!cst->getDef(d)->refCount())
1950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               continue;
1951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            LValue *lval = new_LValue(func, cst->def(d).getFile());
1952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            const uint8_t size = cst->def(d).getSize();
1953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            lval->reg.size = size;
1954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov = new_Instruction(func, OP_MOV, typeOfSize(size));
1956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov->setSrc(0, lval);
1957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov->setDef(0, cst->getDef(d));
1958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cst->setDef(d, mov->getSrc(0));
1959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cst->bb->insertAfter(cst, mov);
1960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cst->getSrc(0)->asLValue()->noSpill = 1;
1962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov->getSrc(0)->asLValue()->noSpill = 1;
1963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else
1965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (cst->op == OP_MERGE || cst->op == OP_UNION) {
1966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (int s = 0; cst->srcExists(s); ++s) {
1967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            const uint8_t size = cst->src(s).getSize();
1968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (!cst->getSrc(s)->defs.size()) {
1970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               mov = new_Instruction(func, OP_NOP, typeOfSize(size));
1971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               mov->setDef(0, cst->getSrc(s));
1972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               cst->bb->insertBefore(cst, mov);
1973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               continue;
1974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            }
1975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(cst->getSrc(s)->defs.size() == 1); // still SSA
1976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
1978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            // catch some cases where don't really need MOVs
1979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
1980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               continue;
1981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            LValue *lval = new_LValue(func, cst->src(s).getFile());
1983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            lval->reg.size = size;
1984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov = new_Instruction(func, OP_MOV, typeOfSize(size));
1986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov->setDef(0, lval);
1987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            mov->setSrc(0, cst->getSrc(s));
1988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cst->setSrc(s, mov->getDef(0));
1989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cst->bb->insertBefore(cst, mov);
1990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
1992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (cst->op == OP_UNION)
1994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               mov->setPredicate(defi->cc, defi->getPredicate());
1995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
1996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
1997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
1998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
2000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
2001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
2002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // namespace nv50_ir
2003