1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv50_ir.h"
24#include "nv50_ir_target.h"
25
26#include <stack>
27#include <limits>
28
29namespace nv50_ir {
30
31#define MAX_REGISTER_FILE_SIZE 256
32
33class RegisterSet
34{
35public:
36   RegisterSet(const Target *);
37
38   void init(const Target *);
39   void reset(DataFile, bool resetMax = false);
40
41   void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
42   void intersect(DataFile f, const RegisterSet *);
43
44   bool assign(int32_t& reg, DataFile f, unsigned int size);
45   void release(DataFile f, int32_t reg, unsigned int size);
46   bool occupy(DataFile f, int32_t reg, unsigned int size);
47   bool occupy(const Value *);
48   void occupyMask(DataFile f, int32_t reg, uint8_t mask);
49
50   inline int getMaxAssigned(DataFile f) const { return fill[f]; }
51
52   inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
53   {
54      if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
55         return (last[f] + 1) / 2;
56      return last[f] + 1;
57   }
58
59   inline unsigned int units(DataFile f, unsigned int size) const
60   {
61      return size >> unit[f];
62   }
63   // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary)
64   inline unsigned int idToBytes(Value *v) const
65   {
66      return v->reg.data.id * MIN2(v->reg.size, 4);
67   }
68   inline unsigned int idToUnits(Value *v) const
69   {
70      return units(v->reg.file, idToBytes(v));
71   }
72   inline int bytesToId(Value *v, unsigned int bytes) const
73   {
74      if (v->reg.size < 4)
75         return units(v->reg.file, bytes);
76      return bytes / 4;
77   }
78   inline int unitsToId(DataFile f, int u, uint8_t size) const
79   {
80      if (u < 0)
81         return -1;
82      return (size < 4) ? u : ((u << unit[f]) / 4);
83   }
84
85   void print() const;
86
87private:
88   BitSet bits[LAST_REGISTER_FILE + 1];
89
90   int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity
91
92   int last[LAST_REGISTER_FILE + 1];
93   int fill[LAST_REGISTER_FILE + 1];
94
95   const bool restrictedGPR16Range;
96};
97
98void
99RegisterSet::reset(DataFile f, bool resetMax)
100{
101   bits[f].fill(0);
102   if (resetMax)
103      fill[f] = -1;
104}
105
106void
107RegisterSet::init(const Target *targ)
108{
109   for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) {
110      DataFile f = static_cast<DataFile>(rf);
111      last[rf] = targ->getFileSize(f) - 1;
112      unit[rf] = targ->getFileUnit(f);
113      fill[rf] = -1;
114      assert(last[rf] < MAX_REGISTER_FILE_SIZE);
115      bits[rf].allocate(last[rf] + 1, true);
116   }
117}
118
119RegisterSet::RegisterSet(const Target *targ)
120  : restrictedGPR16Range(targ->getChipset() < 0xc0)
121{
122   init(targ);
123   for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i)
124      reset(static_cast<DataFile>(i));
125}
126
127void
128RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock)
129{
130   bits[f].periodicMask32(lock, unlock);
131}
132
133void
134RegisterSet::intersect(DataFile f, const RegisterSet *set)
135{
136   bits[f] |= set->bits[f];
137}
138
139void
140RegisterSet::print() const
141{
142   INFO("GPR:");
143   bits[FILE_GPR].print();
144   INFO("\n");
145}
146
147bool
148RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
149{
150   reg = bits[f].findFreeRange(size);
151   if (reg < 0)
152      return false;
153   fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
154   return true;
155}
156
157bool
158RegisterSet::occupy(const Value *v)
159{
160   return occupy(v->reg.file, v->reg.data.id, v->reg.size >> unit[v->reg.file]);
161}
162
163void
164RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask)
165{
166   bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32));
167}
168
169bool
170RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size)
171{
172   if (bits[f].testRange(reg, size))
173      return false;
174
175   bits[f].setRange(reg, size);
176
177   INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size);
178
179   fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
180
181   return true;
182}
183
184void
185RegisterSet::release(DataFile f, int32_t reg, unsigned int size)
186{
187   bits[f].clrRange(reg, size);
188
189   INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size);
190}
191
192class RegAlloc
193{
194public:
195   RegAlloc(Program *program) : prog(program), sequence(0) { }
196
197   bool exec();
198   bool execFunc();
199
200private:
201   class PhiMovesPass : public Pass {
202   private:
203      virtual bool visit(BasicBlock *);
204      inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
205   };
206
207   class ArgumentMovesPass : public Pass {
208   private:
209      virtual bool visit(BasicBlock *);
210   };
211
212   class BuildIntervalsPass : public Pass {
213   private:
214      virtual bool visit(BasicBlock *);
215      void collectLiveValues(BasicBlock *);
216      void addLiveRange(Value *, const BasicBlock *, int end);
217   };
218
219   class InsertConstraintsPass : public Pass {
220   public:
221      bool exec(Function *func);
222   private:
223      virtual bool visit(BasicBlock *);
224
225      bool insertConstraintMoves();
226
227      void condenseDefs(Instruction *);
228      void condenseSrcs(Instruction *, const int first, const int last);
229
230      void addHazard(Instruction *i, const ValueRef *src);
231      void textureMask(TexInstruction *);
232      void addConstraint(Instruction *, int s, int n);
233      bool detectConflict(Instruction *, int s);
234
235      // target specific functions, TODO: put in subclass or Target
236      void texConstraintNV50(TexInstruction *);
237      void texConstraintNVC0(TexInstruction *);
238      void texConstraintNVE0(TexInstruction *);
239
240      std::list<Instruction *> constrList;
241
242      const Target *targ;
243   };
244
245   bool buildLiveSets(BasicBlock *);
246
247private:
248   Program *prog;
249   Function *func;
250
251   // instructions in control flow / chronological order
252   ArrayList insns;
253
254   int sequence; // for manual passes through CFG
255};
256
257typedef std::pair<Value *, Value *> ValuePair;
258
259class SpillCodeInserter
260{
261public:
262   SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { }
263
264   bool run(const std::list<ValuePair>&);
265
266   Symbol *assignSlot(const Interval&, unsigned int size);
267   inline int32_t getStackSize() const { return stackSize; }
268
269private:
270   Function *func;
271
272   struct SpillSlot
273   {
274      Interval occup;
275      std::list<Value *> residents; // needed to recalculate occup
276      Symbol *sym;
277      int32_t offset;
278      inline uint8_t size() const { return sym->reg.size; }
279   };
280   std::list<SpillSlot> slots;
281   int32_t stackSize;
282   int32_t stackBase;
283
284   LValue *unspill(Instruction *usei, LValue *, Value *slot);
285   void spill(Instruction *defi, Value *slot, LValue *);
286};
287
288void
289RegAlloc::BuildIntervalsPass::addLiveRange(Value *val,
290                                           const BasicBlock *bb,
291                                           int end)
292{
293   Instruction *insn = val->getUniqueInsn();
294
295   if (!insn)
296      insn = bb->getFirst();
297
298   assert(bb->getFirst()->serial <= bb->getExit()->serial);
299   assert(bb->getExit()->serial + 1 >= end);
300
301   int begin = insn->serial;
302   if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial)
303      begin = bb->getEntry()->serial;
304
305   INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n",
306            val->id, begin, insn->serial, end);
307
308   if (begin != end) // empty ranges are only added as hazards for fixed regs
309      val->livei.extend(begin, end);
310}
311
312bool
313RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
314{
315   if (b->cfg.incidentCount() <= 1)
316      return false;
317
318   int n = 0;
319   for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next())
320      if (ei.getType() == Graph::Edge::TREE ||
321          ei.getType() == Graph::Edge::FORWARD)
322         ++n;
323   return (n == 2);
324}
325
326// For each operand of each PHI in b, generate a new value by inserting a MOV
327// at the end of the block it is coming from and replace the operand with its
328// result. This eliminates liveness conflicts and enables us to let values be
329// copied to the right register if such a conflict exists nonetheless.
330//
331// These MOVs are also crucial in making sure the live intervals of phi srces
332// are extended until the end of the loop, since they are not included in the
333// live-in sets.
334bool
335RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
336{
337   Instruction *phi, *mov;
338   BasicBlock *pb, *pn;
339
340   std::stack<BasicBlock *> stack;
341
342   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
343      pb = BasicBlock::get(ei.getNode());
344      assert(pb);
345      if (needNewElseBlock(bb, pb))
346         stack.push(pb);
347   }
348   while (!stack.empty()) {
349      pb = stack.top();
350      pn = new BasicBlock(func);
351      stack.pop();
352
353      pb->cfg.detach(&bb->cfg);
354      pb->cfg.attach(&pn->cfg, Graph::Edge::TREE);
355      pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD);
356
357      assert(pb->getExit()->op != OP_CALL);
358      if (pb->getExit()->asFlow()->target.bb == bb)
359         pb->getExit()->asFlow()->target.bb = pn;
360   }
361
362   // insert MOVs (phi->src(j) should stem from j-th in-BB)
363   int j = 0;
364   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
365      pb = BasicBlock::get(ei.getNode());
366      if (!pb->isTerminated())
367         pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
368
369      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
370         mov = new_Instruction(func, OP_MOV, TYPE_U32);
371
372         mov->setSrc(0, phi->getSrc(j));
373         mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
374         phi->setSrc(j, mov->getDef(0));
375
376         pb->insertBefore(pb->getExit(), mov);
377      }
378      ++j;
379   }
380
381   return true;
382}
383
384bool
385RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb)
386{
387   // Bind function call inputs/outputs to the same physical register
388   // the callee uses, inserting moves as appropriate for the case a
389   // conflict arises.
390   for (Instruction *i = bb->getEntry(); i; i = i->next) {
391      FlowInstruction *cal = i->asFlow();
392      if (!cal || cal->op != OP_CALL || cal->builtin)
393         continue;
394      RegisterSet clobberSet(prog->getTarget());
395
396      // Bind input values.
397      for (int s = 0; cal->srcExists(s); ++s) {
398         LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue());
399         tmp->reg.data.id = cal->target.fn->ins[s].rep()->reg.data.id;
400
401         Instruction *mov =
402            new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
403         mov->setDef(0, tmp);
404         mov->setSrc(0, cal->getSrc(s));
405         cal->setSrc(s, tmp);
406
407         bb->insertBefore(cal, mov);
408      }
409
410      // Bind output values.
411      for (int d = 0; cal->defExists(d); ++d) {
412         LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue());
413         tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id;
414
415         Instruction *mov =
416            new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
417         mov->setSrc(0, tmp);
418         mov->setDef(0, cal->getDef(d));
419         cal->setDef(d, tmp);
420
421         bb->insertAfter(cal, mov);
422         clobberSet.occupy(tmp);
423      }
424
425      // Bind clobbered values.
426      for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin();
427           it != cal->target.fn->clobbers.end();
428           ++it) {
429         if (clobberSet.occupy(*it)) {
430            Value *tmp = new_LValue(func, (*it)->asLValue());
431            tmp->reg.data.id = (*it)->reg.data.id;
432            cal->setDef(cal->defCount(), tmp);
433         }
434      }
435   }
436
437   // Update the clobber set of the function.
438   if (BasicBlock::get(func->cfgExit) == bb) {
439      func->buildDefSets();
440      for (unsigned int i = 0; i < bb->defSet.getSize(); ++i)
441         if (bb->defSet.test(i))
442            func->clobbers.push_back(func->getLValue(i));
443   }
444
445   return true;
446}
447
448// Build the set of live-in variables of bb.
449bool
450RegAlloc::buildLiveSets(BasicBlock *bb)
451{
452   Function *f = bb->getFunction();
453   BasicBlock *bn;
454   Instruction *i;
455   unsigned int s, d;
456
457   INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId());
458
459   bb->liveSet.allocate(func->allLValues.getSize(), false);
460
461   int n = 0;
462   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
463      bn = BasicBlock::get(ei.getNode());
464      if (bn == bb)
465         continue;
466      if (bn->cfg.visit(sequence))
467         if (!buildLiveSets(bn))
468            return false;
469      if (n++ || bb->liveSet.marker)
470         bb->liveSet |= bn->liveSet;
471      else
472         bb->liveSet = bn->liveSet;
473   }
474   if (!n && !bb->liveSet.marker)
475      bb->liveSet.fill(0);
476   bb->liveSet.marker = true;
477
478   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
479      INFO("BB:%i live set of out blocks:\n", bb->getId());
480      bb->liveSet.print();
481   }
482
483   // if (!bb->getEntry())
484   //   return true;
485
486   if (bb == BasicBlock::get(f->cfgExit)) {
487      for (std::deque<ValueRef>::iterator it = f->outs.begin();
488           it != f->outs.end(); ++it) {
489         assert(it->get()->asLValue());
490         bb->liveSet.set(it->get()->id);
491      }
492   }
493
494   for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) {
495      for (d = 0; i->defExists(d); ++d)
496         bb->liveSet.clr(i->getDef(d)->id);
497      for (s = 0; i->srcExists(s); ++s)
498         if (i->getSrc(s)->asLValue())
499            bb->liveSet.set(i->getSrc(s)->id);
500   }
501   for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next)
502      bb->liveSet.clr(i->getDef(0)->id);
503
504   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
505      INFO("BB:%i live set after propagation:\n", bb->getId());
506      bb->liveSet.print();
507   }
508
509   return true;
510}
511
512void
513RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb)
514{
515   BasicBlock *bbA = NULL, *bbB = NULL;
516
517   if (bb->cfg.outgoingCount()) {
518      // trickery to save a loop of OR'ing liveSets
519      // aliasing works fine with BitSet::setOr
520      for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
521         if (ei.getType() == Graph::Edge::DUMMY)
522            continue;
523         if (bbA) {
524            bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet);
525            bbA = bb;
526         } else {
527            bbA = bbB;
528         }
529         bbB = BasicBlock::get(ei.getNode());
530      }
531      bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL);
532   } else
533   if (bb->cfg.incidentCount()) {
534      bb->liveSet.fill(0);
535   }
536}
537
538bool
539RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
540{
541   collectLiveValues(bb);
542
543   INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId());
544
545   // go through out blocks and delete phi sources that do not originate from
546   // the current block from the live set
547   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
548      BasicBlock *out = BasicBlock::get(ei.getNode());
549
550      for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) {
551         bb->liveSet.clr(i->getDef(0)->id);
552
553         for (int s = 0; i->srcExists(s); ++s) {
554            assert(i->src(s).getInsn());
555            if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ?
556               bb->liveSet.set(i->getSrc(s)->id);
557            else
558               bb->liveSet.clr(i->getSrc(s)->id);
559         }
560      }
561   }
562
563   // remaining live-outs are live until end
564   if (bb->getExit()) {
565      for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j)
566         if (bb->liveSet.test(j))
567            addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1);
568   }
569
570   for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) {
571      for (int d = 0; i->defExists(d); ++d) {
572         bb->liveSet.clr(i->getDef(d)->id);
573         if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs
574            i->getDef(d)->livei.extend(i->serial, i->serial);
575      }
576
577      for (int s = 0; i->srcExists(s); ++s) {
578         if (!i->getSrc(s)->asLValue())
579            continue;
580         if (!bb->liveSet.test(i->getSrc(s)->id)) {
581            bb->liveSet.set(i->getSrc(s)->id);
582            addLiveRange(i->getSrc(s), bb, i->serial);
583         }
584      }
585   }
586
587   if (bb == BasicBlock::get(func->cfg.getRoot())) {
588      for (std::deque<ValueDef>::iterator it = func->ins.begin();
589           it != func->ins.end(); ++it) {
590         if (it->get()->reg.data.id >= 0) // add hazard for fixed regs
591            it->get()->livei.extend(0, 1);
592      }
593   }
594
595   return true;
596}
597
598
599#define JOIN_MASK_PHI        (1 << 0)
600#define JOIN_MASK_UNION      (1 << 1)
601#define JOIN_MASK_MOV        (1 << 2)
602#define JOIN_MASK_TEX        (1 << 3)
603
604class GCRA
605{
606public:
607   GCRA(Function *, SpillCodeInserter&);
608   ~GCRA();
609
610   bool allocateRegisters(ArrayList& insns);
611
612   void printNodeInfo() const;
613
614private:
615   class RIG_Node : public Graph::Node
616   {
617   public:
618      RIG_Node();
619
620      void init(const RegisterSet&, LValue *);
621
622      void addInterference(RIG_Node *);
623      void addRegPreference(RIG_Node *);
624
625      inline LValue *getValue() const
626      {
627         return reinterpret_cast<LValue *>(data);
628      }
629      inline void setValue(LValue *lval) { data = lval; }
630
631      inline uint8_t getCompMask() const
632      {
633         return ((1 << colors) - 1) << (reg & 7);
634      }
635
636      static inline RIG_Node *get(const Graph::EdgeIterator& ei)
637      {
638         return static_cast<RIG_Node *>(ei.getNode());
639      }
640
641   public:
642      uint32_t degree;
643      uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
644      uint16_t colors;
645
646      DataFile f;
647      int32_t reg;
648
649      float weight;
650
651      // list pointers for simplify() phase
652      RIG_Node *next;
653      RIG_Node *prev;
654
655      // union of the live intervals of all coalesced values (we want to retain
656      //  the separate intervals for testing interference of compound values)
657      Interval livei;
658
659      std::list<RIG_Node *> prefRegs;
660   };
661
662private:
663   inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; }
664
665   void buildRIG(ArrayList&);
666   bool coalesce(ArrayList&);
667   bool doCoalesce(ArrayList&, unsigned int mask);
668   void calculateSpillWeights();
669   void simplify();
670   bool selectRegisters();
671   void cleanup(const bool success);
672
673   void simplifyEdge(RIG_Node *, RIG_Node *);
674   void simplifyNode(RIG_Node *);
675
676   bool coalesceValues(Value *, Value *, bool force);
677   void resolveSplitsAndMerges();
678   void makeCompound(Instruction *, bool isSplit);
679
680   inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&);
681
682   inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *);
683   void checkList(std::list<RIG_Node *>&);
684
685private:
686   std::stack<uint32_t> stack;
687
688   // list headers for simplify() phase
689   RIG_Node lo[2];
690   RIG_Node hi;
691
692   Graph RIG;
693   RIG_Node *nodes;
694   unsigned int nodeCount;
695
696   Function *func;
697   Program *prog;
698
699   static uint8_t relDegree[17][17];
700
701   RegisterSet regs;
702
703   // need to fixup register id for participants of OP_MERGE/SPLIT
704   std::list<Instruction *> merges;
705   std::list<Instruction *> splits;
706
707   SpillCodeInserter& spill;
708   std::list<ValuePair> mustSpill;
709};
710
711uint8_t GCRA::relDegree[17][17];
712
713GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
714{
715   colors = 0;
716}
717
718void
719GCRA::printNodeInfo() const
720{
721   for (unsigned int i = 0; i < nodeCount; ++i) {
722      if (!nodes[i].colors)
723         continue;
724      INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X",
725           i,
726           nodes[i].f,nodes[i].reg,nodes[i].colors,
727           nodes[i].weight,
728           nodes[i].degree, nodes[i].degreeLimit);
729
730      for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next())
731         INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
732      for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next())
733         INFO(" %%%i", RIG_Node::get(ei)->getValue()->id);
734      INFO("\n");
735   }
736}
737
738void
739GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval)
740{
741   setValue(lval);
742   if (lval->reg.data.id >= 0)
743      lval->noSpill = lval->fixedReg = 1;
744
745   colors = regs.units(lval->reg.file, lval->reg.size);
746   f = lval->reg.file;
747   reg = -1;
748   if (lval->reg.data.id >= 0)
749      reg = regs.idToUnits(lval);
750
751   weight = std::numeric_limits<float>::infinity();
752   degree = 0;
753   degreeLimit = regs.getFileSize(f, lval->reg.size);
754
755   livei.insert(lval->livei);
756}
757
758bool
759GCRA::coalesceValues(Value *dst, Value *src, bool force)
760{
761   LValue *rep = dst->join->asLValue();
762   LValue *val = src->join->asLValue();
763
764   if (!force && val->reg.data.id >= 0) {
765      rep = src->join->asLValue();
766      val = dst->join->asLValue();
767   }
768   RIG_Node *nRep = &nodes[rep->id];
769   RIG_Node *nVal = &nodes[val->id];
770
771   if (src->reg.file != dst->reg.file) {
772      if (!force)
773         return false;
774      WARN("forced coalescing of values in different files !\n");
775   }
776   if (!force && dst->reg.size != src->reg.size)
777      return false;
778
779   if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) {
780      if (force) {
781         if (val->reg.data.id >= 0)
782            WARN("forced coalescing of values in different fixed regs !\n");
783      } else {
784         if (val->reg.data.id >= 0)
785            return false;
786         // make sure that there is no overlap with the fixed register of rep
787         for (ArrayList::Iterator it = func->allLValues.iterator();
788              !it.end(); it.next()) {
789            Value *reg = reinterpret_cast<Value *>(it.get())->asLValue();
790            assert(reg);
791            if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei))
792               return false;
793         }
794      }
795   }
796
797   if (!force && nRep->livei.overlaps(nVal->livei))
798      return false;
799
800   INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n",
801            rep->id, rep->reg.data.id, val->id);
802
803   // set join pointer of all values joined with val
804   for (Value::DefIterator def = val->defs.begin(); def != val->defs.end();
805        ++def)
806      (*def)->get()->join = rep;
807   assert(rep->join == rep && val->join == rep);
808
809   // add val's definitions to rep and extend the live interval of its RIG node
810   rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
811   nRep->livei.unify(nVal->livei);
812   return true;
813}
814
815bool
816GCRA::coalesce(ArrayList& insns)
817{
818   bool ret = doCoalesce(insns, JOIN_MASK_PHI);
819   if (!ret)
820      return false;
821   switch (func->getProgram()->getTarget()->getChipset() & ~0xf) {
822   case 0x50:
823   case 0x80:
824   case 0x90:
825   case 0xa0:
826      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
827      break;
828   case 0xc0:
829   case 0xd0:
830   case 0xe0:
831      ret = doCoalesce(insns, JOIN_MASK_UNION);
832      break;
833   default:
834      break;
835   }
836   if (!ret)
837      return false;
838   return doCoalesce(insns, JOIN_MASK_MOV);
839}
840
841static inline uint8_t makeCompMask(int compSize, int base, int size)
842{
843   uint8_t m = ((1 << size) - 1) << base;
844
845   switch (compSize) {
846   case 1:
847      return 0xff;
848   case 2:
849      m |= (m << 2);
850      return (m << 4) | m;
851   case 3:
852   case 4:
853      return (m << 4) | m;
854   default:
855      assert(compSize <= 8);
856      return m;
857   }
858}
859
860static inline void copyCompound(Value *dst, Value *src)
861{
862   LValue *ldst = dst->asLValue();
863   LValue *lsrc = src->asLValue();
864
865   ldst->compound = lsrc->compound;
866   ldst->compMask = lsrc->compMask;
867}
868
869void
870GCRA::makeCompound(Instruction *insn, bool split)
871{
872   LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue();
873
874   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) {
875      INFO("makeCompound(split = %i): ", split);
876      insn->print();
877   }
878
879   const unsigned int size = getNode(rep)->colors;
880   unsigned int base = 0;
881
882   if (!rep->compound)
883      rep->compMask = 0xff;
884   rep->compound = 1;
885
886   for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) {
887      LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue();
888
889      val->compound = 1;
890      if (!val->compMask)
891         val->compMask = 0xff;
892      val->compMask &= makeCompMask(size, base, getNode(val)->colors);
893      assert(val->compMask);
894
895      INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n",
896           rep->id, rep->compMask, val->id, val->compMask);
897
898      base += getNode(val)->colors;
899   }
900   assert(base == size);
901}
902
903bool
904GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
905{
906   int c, n;
907
908   for (n = 0; n < insns.getSize(); ++n) {
909      Instruction *i;
910      Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n));
911
912      switch (insn->op) {
913      case OP_PHI:
914         if (!(mask & JOIN_MASK_PHI))
915            break;
916         for (c = 0; insn->srcExists(c); ++c)
917            if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) {
918               // this is bad
919               ERROR("failed to coalesce phi operands\n");
920               return false;
921            }
922         break;
923      case OP_UNION:
924      case OP_MERGE:
925         if (!(mask & JOIN_MASK_UNION))
926            break;
927         for (c = 0; insn->srcExists(c); ++c)
928            coalesceValues(insn->getDef(0), insn->getSrc(c), true);
929         if (insn->op == OP_MERGE) {
930            merges.push_back(insn);
931            if (insn->srcExists(1))
932               makeCompound(insn, false);
933         }
934         break;
935      case OP_SPLIT:
936         if (!(mask & JOIN_MASK_UNION))
937            break;
938         splits.push_back(insn);
939         for (c = 0; insn->defExists(c); ++c)
940            coalesceValues(insn->getSrc(0), insn->getDef(c), true);
941         makeCompound(insn, true);
942         break;
943      case OP_MOV:
944         if (!(mask & JOIN_MASK_MOV))
945            break;
946         i = NULL;
947         if (!insn->getDef(0)->uses.empty())
948            i = insn->getDef(0)->uses.front()->getInsn();
949         // if this is a contraint-move there will only be a single use
950         if (i && i->op == OP_MERGE) // do we really still need this ?
951            break;
952         i = insn->getSrc(0)->getUniqueInsn();
953         if (i && !i->constrainedDefs()) {
954            if (coalesceValues(insn->getDef(0), insn->getSrc(0), false))
955               copyCompound(insn->getSrc(0), insn->getDef(0));
956         }
957         break;
958      case OP_TEX:
959      case OP_TXB:
960      case OP_TXL:
961      case OP_TXF:
962      case OP_TXQ:
963      case OP_TXD:
964      case OP_TXG:
965      case OP_TEXCSAA:
966         if (!(mask & JOIN_MASK_TEX))
967            break;
968         for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
969            coalesceValues(insn->getDef(c), insn->getSrc(c), true);
970         break;
971      default:
972         break;
973      }
974   }
975   return true;
976}
977
978void
979GCRA::RIG_Node::addInterference(RIG_Node *node)
980{
981   this->degree += relDegree[node->colors][colors];
982   node->degree += relDegree[colors][node->colors];
983
984   this->attach(node, Graph::Edge::CROSS);
985}
986
987void
988GCRA::RIG_Node::addRegPreference(RIG_Node *node)
989{
990   prefRegs.push_back(node);
991}
992
993GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
994   func(fn),
995   regs(fn->getProgram()->getTarget()),
996   spill(spill)
997{
998   prog = func->getProgram();
999
1000   // initialize relative degrees array - i takes away from j
1001   for (int i = 1; i <= 16; ++i)
1002      for (int j = 1; j <= 16; ++j)
1003         relDegree[i][j] = j * ((i + j - 1) / j);
1004}
1005
1006GCRA::~GCRA()
1007{
1008   if (nodes)
1009      delete[] nodes;
1010}
1011
1012void
1013GCRA::checkList(std::list<RIG_Node *>& lst)
1014{
1015   GCRA::RIG_Node *prev = NULL;
1016
1017   for (std::list<RIG_Node *>::iterator it = lst.begin();
1018        it != lst.end();
1019        ++it) {
1020      assert((*it)->getValue()->join == (*it)->getValue());
1021      if (prev)
1022         assert(prev->livei.begin() <= (*it)->livei.begin());
1023      prev = *it;
1024   }
1025}
1026
1027void
1028GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node)
1029{
1030   if (node->livei.isEmpty())
1031      return;
1032   // only the intervals of joined values don't necessarily arrive in order
1033   std::list<RIG_Node *>::iterator prev, it;
1034   for (it = list.end(); it != list.begin(); it = prev) {
1035      prev = it;
1036      --prev;
1037      if ((*prev)->livei.begin() <= node->livei.begin())
1038         break;
1039   }
1040   list.insert(it, node);
1041}
1042
1043void
1044GCRA::buildRIG(ArrayList& insns)
1045{
1046   std::list<RIG_Node *> values, active;
1047
1048   for (std::deque<ValueDef>::iterator it = func->ins.begin();
1049        it != func->ins.end(); ++it)
1050      insertOrderedTail(values, getNode(it->get()->asLValue()));
1051
1052   for (int i = 0; i < insns.getSize(); ++i) {
1053      Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i));
1054      for (int d = 0; insn->defExists(d); ++d)
1055         if (insn->getDef(d)->rep() == insn->getDef(d))
1056            insertOrderedTail(values, getNode(insn->getDef(d)->asLValue()));
1057   }
1058   checkList(values);
1059
1060   while (!values.empty()) {
1061      RIG_Node *cur = values.front();
1062
1063      for (std::list<RIG_Node *>::iterator it = active.begin();
1064           it != active.end();
1065           ++it) {
1066         RIG_Node *node = *it;
1067
1068         if (node->livei.end() <= cur->livei.begin()) {
1069            it = active.erase(it);
1070            --it;
1071         } else
1072         if (node->f == cur->f && node->livei.overlaps(cur->livei)) {
1073            cur->addInterference(node);
1074         }
1075      }
1076      values.pop_front();
1077      active.push_back(cur);
1078   }
1079}
1080
1081void
1082GCRA::calculateSpillWeights()
1083{
1084   for (unsigned int i = 0; i < nodeCount; ++i) {
1085      RIG_Node *const n = &nodes[i];
1086      if (!nodes[i].colors || nodes[i].livei.isEmpty())
1087         continue;
1088      if (nodes[i].reg >= 0) {
1089         // update max reg
1090         regs.occupy(n->f, n->reg, n->colors);
1091         continue;
1092      }
1093      LValue *val = nodes[i].getValue();
1094
1095      if (!val->noSpill) {
1096         int rc = 0;
1097         for (Value::DefIterator it = val->defs.begin();
1098              it != val->defs.end();
1099              ++it)
1100            rc += (*it)->get()->refCount();
1101
1102         nodes[i].weight =
1103            (float)rc * (float)rc / (float)nodes[i].livei.extent();
1104      }
1105
1106      if (nodes[i].degree < nodes[i].degreeLimit) {
1107         int l = 0;
1108         if (val->reg.size > 4)
1109            l = 1;
1110         DLLIST_ADDHEAD(&lo[l], &nodes[i]);
1111      } else {
1112         DLLIST_ADDHEAD(&hi, &nodes[i]);
1113      }
1114   }
1115   if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1116      printNodeInfo();
1117}
1118
1119void
1120GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b)
1121{
1122   bool move = b->degree >= b->degreeLimit;
1123
1124   INFO_DBG(prog->dbgFlags, REG_ALLOC,
1125            "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n",
1126            a->getValue()->id, a->degree, a->degreeLimit,
1127            b->getValue()->id, b->degree, b->degreeLimit);
1128
1129   b->degree -= relDegree[a->colors][b->colors];
1130
1131   move = move && b->degree < b->degreeLimit;
1132   if (move && !DLLIST_EMPTY(b)) {
1133      int l = (b->getValue()->reg.size > 4) ? 1 : 0;
1134      DLLIST_DEL(b);
1135      DLLIST_ADDTAIL(&lo[l], b);
1136   }
1137}
1138
1139void
1140GCRA::simplifyNode(RIG_Node *node)
1141{
1142   for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1143      simplifyEdge(node, RIG_Node::get(ei));
1144
1145   for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1146      simplifyEdge(node, RIG_Node::get(ei));
1147
1148   DLLIST_DEL(node);
1149   stack.push(node->getValue()->id);
1150
1151   INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n",
1152            node->getValue()->id,
1153            (node->degree < node->degreeLimit) ? "" : "(spill)");
1154}
1155
1156void
1157GCRA::simplify()
1158{
1159   for (;;) {
1160      if (!DLLIST_EMPTY(&lo[0])) {
1161         do {
1162            simplifyNode(lo[0].next);
1163         } while (!DLLIST_EMPTY(&lo[0]));
1164      } else
1165      if (!DLLIST_EMPTY(&lo[1])) {
1166         simplifyNode(lo[1].next);
1167      } else
1168      if (!DLLIST_EMPTY(&hi)) {
1169         RIG_Node *best = hi.next;
1170         float bestScore = best->weight / (float)best->degree;
1171         // spill candidate
1172         for (RIG_Node *it = best->next; it != &hi; it = it->next) {
1173            float score = it->weight / (float)it->degree;
1174            if (score < bestScore) {
1175               best = it;
1176               bestScore = score;
1177            }
1178         }
1179         if (isinf(bestScore)) {
1180            ERROR("no viable spill candidates left\n");
1181            break;
1182         }
1183         simplifyNode(best);
1184      } else {
1185         break;
1186      }
1187   }
1188}
1189
1190void
1191GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei)
1192{
1193   const RIG_Node *intf = RIG_Node::get(ei);
1194
1195   if (intf->reg < 0)
1196      return;
1197   const LValue *vA = node->getValue();
1198   const LValue *vB = intf->getValue();
1199
1200   const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7);
1201
1202   if (vA->compound | vB->compound) {
1203      // NOTE: this only works for >aligned< register tuples !
1204      for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) {
1205      for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) {
1206         const LValue *vD = (*D)->get()->asLValue();
1207         const LValue *vd = (*d)->get()->asLValue();
1208
1209         if (!vD->livei.overlaps(vd->livei)) {
1210            INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n",
1211                     vD->id, vd->id);
1212            continue;
1213         }
1214
1215         uint8_t mask = vD->compound ? vD->compMask : ~0;
1216         if (vd->compound) {
1217            assert(vB->compound);
1218            mask &= vd->compMask & vB->compMask;
1219         } else {
1220            mask &= intfMask;
1221         }
1222
1223         INFO_DBG(prog->dbgFlags, REG_ALLOC,
1224                  "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n",
1225                  vD->id,
1226                  vD->compound ? vD->compMask : 0xff,
1227                  vd->id,
1228                  vd->compound ? vd->compMask : intfMask,
1229                  vB->compMask, intf->reg & ~7, mask);
1230         if (mask)
1231            regs.occupyMask(node->f, intf->reg & ~7, mask);
1232      }
1233      }
1234   } else {
1235      INFO_DBG(prog->dbgFlags, REG_ALLOC,
1236               "(%%%i) X (%%%i): $r%i + %u\n",
1237               vA->id, vB->id, intf->reg, intf->colors);
1238      regs.occupy(node->f, intf->reg, intf->colors);
1239   }
1240}
1241
1242bool
1243GCRA::selectRegisters()
1244{
1245   INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n");
1246
1247   while (!stack.empty()) {
1248      RIG_Node *node = &nodes[stack.top()];
1249      stack.pop();
1250
1251      regs.reset(node->f);
1252
1253      INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n",
1254               node->getValue()->id, node->colors);
1255
1256      for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next())
1257         checkInterference(node, ei);
1258      for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next())
1259         checkInterference(node, ei);
1260
1261      if (!node->prefRegs.empty()) {
1262         for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin();
1263              it != node->prefRegs.end();
1264              ++it) {
1265            if ((*it)->reg >= 0 &&
1266                regs.occupy(node->f, (*it)->reg, node->colors)) {
1267               node->reg = (*it)->reg;
1268               break;
1269            }
1270         }
1271      }
1272      if (node->reg >= 0)
1273         continue;
1274      LValue *lval = node->getValue();
1275      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1276         regs.print();
1277      bool ret = regs.assign(node->reg, node->f, node->colors);
1278      if (ret) {
1279         INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
1280         lval->compMask = node->getCompMask();
1281      } else {
1282         INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n",
1283                  lval->id, lval->reg.size);
1284         Symbol *slot = NULL;
1285         if (lval->reg.file == FILE_GPR)
1286            slot = spill.assignSlot(node->livei, lval->reg.size);
1287         mustSpill.push_back(ValuePair(lval, slot));
1288      }
1289   }
1290   if (!mustSpill.empty())
1291      return false;
1292   for (unsigned int i = 0; i < nodeCount; ++i) {
1293      LValue *lval = nodes[i].getValue();
1294      if (nodes[i].reg >= 0 && nodes[i].colors > 0)
1295         lval->reg.data.id =
1296            regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size);
1297   }
1298   return true;
1299}
1300
1301bool
1302GCRA::allocateRegisters(ArrayList& insns)
1303{
1304   bool ret;
1305
1306   INFO_DBG(prog->dbgFlags, REG_ALLOC,
1307            "allocateRegisters to %u instructions\n", insns.getSize());
1308
1309   nodeCount = func->allLValues.getSize();
1310   nodes = new RIG_Node[nodeCount];
1311   if (!nodes)
1312      return false;
1313   for (unsigned int i = 0; i < nodeCount; ++i) {
1314      LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i));
1315      if (lval) {
1316         nodes[i].init(regs, lval);
1317         RIG.insert(&nodes[i]);
1318      }
1319   }
1320
1321   // coalesce first, we use only 1 RIG node for a group of joined values
1322   ret = coalesce(insns);
1323   if (!ret)
1324      goto out;
1325
1326   if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1327      func->printLiveIntervals();
1328
1329   buildRIG(insns);
1330   calculateSpillWeights();
1331   simplify();
1332
1333   ret = selectRegisters();
1334   if (!ret) {
1335      INFO_DBG(prog->dbgFlags, REG_ALLOC,
1336               "selectRegisters failed, inserting spill code ...\n");
1337      regs.reset(FILE_GPR, true);
1338      spill.run(mustSpill);
1339      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1340         func->print();
1341   } else {
1342      prog->maxGPR = regs.getMaxAssigned(FILE_GPR);
1343   }
1344
1345out:
1346   cleanup(ret);
1347   return ret;
1348}
1349
1350void
1351GCRA::cleanup(const bool success)
1352{
1353   mustSpill.clear();
1354
1355   for (ArrayList::Iterator it = func->allLValues.iterator();
1356        !it.end(); it.next()) {
1357      LValue *lval =  reinterpret_cast<LValue *>(it.get());
1358
1359      lval->livei.clear();
1360
1361      lval->compound = 0;
1362      lval->compMask = 0;
1363
1364      if (lval->join == lval)
1365         continue;
1366
1367      if (success) {
1368         lval->reg.data.id = lval->join->reg.data.id;
1369      } else {
1370         for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
1371              ++d)
1372            lval->join->defs.remove(*d);
1373         lval->join = lval;
1374      }
1375   }
1376
1377   if (success)
1378      resolveSplitsAndMerges();
1379   splits.clear(); // avoid duplicate entries on next coalesce pass
1380   merges.clear();
1381
1382   delete[] nodes;
1383   nodes = NULL;
1384}
1385
1386Symbol *
1387SpillCodeInserter::assignSlot(const Interval &livei, unsigned int size)
1388{
1389   SpillSlot slot;
1390   int32_t offsetBase = stackSize;
1391   int32_t offset;
1392   std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin();
1393
1394   if (offsetBase % size)
1395      offsetBase += size - (offsetBase % size);
1396
1397   slot.sym = NULL;
1398
1399   for (offset = offsetBase; offset < stackSize; offset += size) {
1400      while (it != slots.end() && it->offset < offset)
1401         ++it;
1402      if (it == slots.end()) // no slots left
1403         break;
1404      std::list<SpillSlot>::iterator bgn = it;
1405
1406      while (it != slots.end() && it->offset < (offset + size)) {
1407         it->occup.print();
1408         if (it->occup.overlaps(livei))
1409            break;
1410         ++it;
1411      }
1412      if (it == slots.end() || it->offset >= (offset + size)) {
1413         // fits
1414         for (; bgn != slots.end() && bgn->offset < (offset + size); ++bgn) {
1415            bgn->occup.insert(livei);
1416            if (bgn->size() == size)
1417               slot.sym = bgn->sym;
1418         }
1419         break;
1420      }
1421   }
1422   if (!slot.sym) {
1423      stackSize = offset + size;
1424      slot.offset = offset;
1425      slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL);
1426      if (!func->stackPtr)
1427         offset += func->tlsBase;
1428      slot.sym->setAddress(NULL, offset);
1429      slot.sym->reg.size = size;
1430      slots.insert(pos, slot)->occup.insert(livei);
1431   }
1432   return slot.sym;
1433}
1434
1435void
1436SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
1437{
1438   const DataType ty = typeOfSize(slot->reg.size);
1439
1440   Instruction *st;
1441   if (slot->reg.file == FILE_MEMORY_LOCAL) {
1442      st = new_Instruction(func, OP_STORE, ty);
1443      st->setSrc(0, slot);
1444      st->setSrc(1, lval);
1445      lval->noSpill = 1;
1446   } else {
1447      st = new_Instruction(func, OP_CVT, ty);
1448      st->setDef(0, slot);
1449      st->setSrc(0, lval);
1450   }
1451   defi->bb->insertAfter(defi, st);
1452}
1453
1454LValue *
1455SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
1456{
1457   const DataType ty = typeOfSize(slot->reg.size);
1458
1459   lval = cloneShallow(func, lval);
1460
1461   Instruction *ld;
1462   if (slot->reg.file == FILE_MEMORY_LOCAL) {
1463      lval->noSpill = 1;
1464      ld = new_Instruction(func, OP_LOAD, ty);
1465   } else {
1466      ld = new_Instruction(func, OP_CVT, ty);
1467   }
1468   ld->setDef(0, lval);
1469   ld->setSrc(0, slot);
1470
1471   usei->bb->insertBefore(usei, ld);
1472   return lval;
1473}
1474
1475bool
1476SpillCodeInserter::run(const std::list<ValuePair>& lst)
1477{
1478   for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end();
1479        ++it) {
1480      LValue *lval = it->first->asLValue();
1481      Symbol *mem = it->second ? it->second->asSym() : NULL;
1482
1483      for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
1484           ++d) {
1485         Value *slot = mem ?
1486            static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
1487         Value *tmp = NULL;
1488         Instruction *last = NULL;
1489
1490         LValue *dval = (*d)->get()->asLValue();
1491         Instruction *defi = (*d)->getInsn();
1492
1493         // handle uses first or they'll contain the spill stores
1494         while (!dval->uses.empty()) {
1495            ValueRef *u = dval->uses.front();
1496            Instruction *usei = u->getInsn();
1497            assert(usei);
1498            if (usei->op == OP_PHI) {
1499               tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot;
1500               last = NULL;
1501            } else
1502            if (!last || usei != last->next) { // TODO: sort uses
1503               tmp = unspill(usei, dval, slot);
1504               last = usei;
1505            }
1506            u->set(tmp);
1507         }
1508
1509         assert(defi);
1510         if (defi->op == OP_PHI) {
1511            d = lval->defs.erase(d);
1512            --d;
1513            if (slot->reg.file == FILE_MEMORY_LOCAL)
1514               delete_Instruction(func->getProgram(), defi);
1515            else
1516               defi->setDef(0, slot);
1517         } else {
1518            spill(defi, slot, dval);
1519         }
1520      }
1521
1522   }
1523
1524   // TODO: We're not trying to reuse old slots in a potential next iteration.
1525   //  We have to update the slots' livei intervals to be able to do that.
1526   stackBase = stackSize;
1527   slots.clear();
1528   return true;
1529}
1530
1531bool
1532RegAlloc::exec()
1533{
1534   for (IteratorRef it = prog->calls.iteratorDFS(false);
1535        !it->end(); it->next()) {
1536      func = Function::get(reinterpret_cast<Graph::Node *>(it->get()));
1537
1538      func->tlsBase = prog->tlsSize;
1539      if (!execFunc())
1540         return false;
1541      prog->tlsSize += func->tlsSize;
1542   }
1543   return true;
1544}
1545
1546bool
1547RegAlloc::execFunc()
1548{
1549   InsertConstraintsPass insertConstr;
1550   PhiMovesPass insertPhiMoves;
1551   ArgumentMovesPass insertArgMoves;
1552   BuildIntervalsPass buildIntervals;
1553   SpillCodeInserter insertSpills(func);
1554
1555   GCRA gcra(func, insertSpills);
1556
1557   unsigned int i, retries;
1558   bool ret;
1559
1560   ret = insertConstr.exec(func);
1561   if (!ret)
1562      goto out;
1563
1564   ret = insertPhiMoves.run(func);
1565   if (!ret)
1566      goto out;
1567
1568   ret = insertArgMoves.run(func);
1569   if (!ret)
1570      goto out;
1571
1572   // TODO: need to fix up spill slot usage ranges to support > 1 retry
1573   for (retries = 0; retries < 3; ++retries) {
1574      if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC))
1575         INFO("Retry: %i\n", retries);
1576      if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
1577         func->print();
1578
1579      // spilling to registers may add live ranges, need to rebuild everything
1580      ret = true;
1581      for (sequence = func->cfg.nextSequence(), i = 0;
1582           ret && i <= func->loopNestingBound;
1583           sequence = func->cfg.nextSequence(), ++i)
1584         ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
1585      if (!ret)
1586         break;
1587      func->orderInstructions(this->insns);
1588
1589      ret = buildIntervals.run(func);
1590      if (!ret)
1591         break;
1592      ret = gcra.allocateRegisters(insns);
1593      if (ret)
1594         break; // success
1595   }
1596   INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret);
1597
1598   func->tlsSize = insertSpills.getStackSize();
1599out:
1600   return ret;
1601}
1602
1603// TODO: check if modifying Instruction::join here breaks anything
1604void
1605GCRA::resolveSplitsAndMerges()
1606{
1607   for (std::list<Instruction *>::iterator it = splits.begin();
1608        it != splits.end();
1609        ++it) {
1610      Instruction *split = *it;
1611      unsigned int reg = regs.idToBytes(split->getSrc(0));
1612      for (int d = 0; split->defExists(d); ++d) {
1613         Value *v = split->getDef(d);
1614         v->reg.data.id = regs.bytesToId(v, reg);
1615         v->join = v;
1616         reg += v->reg.size;
1617      }
1618   }
1619   splits.clear();
1620
1621   for (std::list<Instruction *>::iterator it = merges.begin();
1622        it != merges.end();
1623        ++it) {
1624      Instruction *merge = *it;
1625      unsigned int reg = regs.idToBytes(merge->getDef(0));
1626      for (int s = 0; merge->srcExists(s); ++s) {
1627         Value *v = merge->getSrc(s);
1628         v->reg.data.id = regs.bytesToId(v, reg);
1629         v->join = v;
1630         reg += v->reg.size;
1631      }
1632   }
1633   merges.clear();
1634}
1635
1636bool Program::registerAllocation()
1637{
1638   RegAlloc ra(this);
1639   return ra.exec();
1640}
1641
1642bool
1643RegAlloc::InsertConstraintsPass::exec(Function *ir)
1644{
1645   constrList.clear();
1646
1647   bool ret = run(ir, true, true);
1648   if (ret)
1649      ret = insertConstraintMoves();
1650   return ret;
1651}
1652
1653// TODO: make part of texture insn
1654void
1655RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex)
1656{
1657   Value *def[4];
1658   int c, k, d;
1659   uint8_t mask = 0;
1660
1661   for (d = 0, k = 0, c = 0; c < 4; ++c) {
1662      if (!(tex->tex.mask & (1 << c)))
1663         continue;
1664      if (tex->getDef(k)->refCount()) {
1665         mask |= 1 << c;
1666         def[d++] = tex->getDef(k);
1667      }
1668      ++k;
1669   }
1670   tex->tex.mask = mask;
1671
1672   for (c = 0; c < d; ++c)
1673      tex->setDef(c, def[c]);
1674   for (; c < 4; ++c)
1675      tex->setDef(c, NULL);
1676}
1677
1678bool
1679RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s)
1680{
1681   Value *v = cst->getSrc(s);
1682
1683   // current register allocation can't handle it if a value participates in
1684   // multiple constraints
1685   for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) {
1686      if (cst != (*it)->getInsn())
1687         return true;
1688   }
1689
1690   // can start at s + 1 because detectConflict is called on all sources
1691   for (int c = s + 1; cst->srcExists(c); ++c)
1692      if (v == cst->getSrc(c))
1693         return true;
1694
1695   Instruction *defi = v->getInsn();
1696
1697   return (!defi || defi->constrainedDefs());
1698}
1699
1700void
1701RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n)
1702{
1703   Instruction *cst;
1704   int d;
1705
1706   // first, look for an existing identical constraint op
1707   for (std::list<Instruction *>::iterator it = constrList.begin();
1708        it != constrList.end();
1709        ++it) {
1710      cst = (*it);
1711      if (!i->bb->dominatedBy(cst->bb))
1712         break;
1713      for (d = 0; d < n; ++d)
1714         if (cst->getSrc(d) != i->getSrc(d + s))
1715            break;
1716      if (d >= n) {
1717         for (d = 0; d < n; ++d, ++s)
1718            i->setSrc(s, cst->getDef(d));
1719         return;
1720      }
1721   }
1722   cst = new_Instruction(func, OP_CONSTRAINT, i->dType);
1723
1724   for (d = 0; d < n; ++s, ++d) {
1725      cst->setDef(d, new_LValue(func, FILE_GPR));
1726      cst->setSrc(d, i->getSrc(s));
1727      i->setSrc(s, cst->getDef(d));
1728   }
1729   i->bb->insertBefore(i, cst);
1730
1731   constrList.push_back(cst);
1732}
1733
1734// Add a dummy use of the pointer source of >= 8 byte loads after the load
1735// to prevent it from being assigned a register which overlapping the load's
1736// destination, which would produce random corruptions.
1737void
1738RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src)
1739{
1740   Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE);
1741   hzd->setSrc(0, src->get());
1742   i->bb->insertAfter(i, hzd);
1743
1744}
1745
1746// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q
1747void
1748RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
1749{
1750   uint8_t size = 0;
1751   int n;
1752   for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
1753      size += insn->getDef(n)->reg.size;
1754   if (n < 2)
1755      return;
1756   LValue *lval = new_LValue(func, FILE_GPR);
1757   lval->reg.size = size;
1758
1759   Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
1760   split->setSrc(0, lval);
1761   for (int d = 0; d < n; ++d) {
1762      split->setDef(d, insn->getDef(d));
1763      insn->setDef(d, NULL);
1764   }
1765   insn->setDef(0, lval);
1766
1767   for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
1768      insn->setDef(k, insn->getDef(d));
1769      insn->setDef(d, NULL);
1770   }
1771   // carry over predicate if any (mainly for OP_UNION uses)
1772   split->setPredicate(insn->cc, insn->getPredicate());
1773
1774   insn->bb->insertAfter(insn, split);
1775   constrList.push_back(split);
1776}
1777void
1778RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
1779                                              const int a, const int b)
1780{
1781   uint8_t size = 0;
1782   if (a >= b)
1783      return;
1784   for (int s = a; s <= b; ++s)
1785      size += insn->getSrc(s)->reg.size;
1786   if (!size)
1787      return;
1788   LValue *lval = new_LValue(func, FILE_GPR);
1789   lval->reg.size = size;
1790
1791   Value *save[3];
1792   insn->takeExtraSources(0, save);
1793
1794   Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size));
1795   merge->setDef(0, lval);
1796   for (int s = a, i = 0; s <= b; ++s, ++i) {
1797      merge->setSrc(i, insn->getSrc(s));
1798      insn->setSrc(s, NULL);
1799   }
1800   insn->setSrc(a, lval);
1801
1802   for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) {
1803      insn->setSrc(k, insn->getSrc(s));
1804      insn->setSrc(s, NULL);
1805   }
1806   insn->bb->insertBefore(insn, merge);
1807
1808   insn->putExtraSources(0, save);
1809
1810   constrList.push_back(merge);
1811}
1812
1813void
1814RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
1815{
1816   textureMask(tex);
1817   condenseDefs(tex);
1818
1819   int n = tex->srcCount(0xff, true);
1820   if (n > 4) {
1821      condenseSrcs(tex, 0, 3);
1822      if (n > 5) // NOTE: first call modified positions already
1823         condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
1824   } else
1825   if (n > 1) {
1826      condenseSrcs(tex, 0, n - 1);
1827   }
1828}
1829
1830void
1831RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
1832{
1833   int n, s;
1834
1835   textureMask(tex);
1836
1837   if (tex->op == OP_TXQ) {
1838      s = tex->srcCount(0xff);
1839      n = 0;
1840   } else {
1841      s = tex->tex.target.getArgCount();
1842      if (!tex->tex.target.isArray() &&
1843          (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
1844         ++s;
1845      if (tex->op == OP_TXD && tex->tex.useOffsets)
1846         ++s;
1847      n = tex->srcCount(0xff) - s;
1848      assert(n <= 4);
1849   }
1850
1851   if (s > 1)
1852      condenseSrcs(tex, 0, s - 1);
1853   if (n > 1) // NOTE: first call modified positions already
1854      condenseSrcs(tex, 1, n);
1855
1856   condenseDefs(tex);
1857}
1858
1859void
1860RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex)
1861{
1862   Value *pred = tex->getPredicate();
1863   if (pred)
1864      tex->setPredicate(tex->cc, NULL);
1865
1866   textureMask(tex);
1867
1868   assert(tex->defExists(0) && tex->srcExists(0));
1869   // make src and def count match
1870   int c;
1871   for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) {
1872      if (!tex->srcExists(c))
1873         tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue()));
1874      if (!tex->defExists(c))
1875         tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue()));
1876   }
1877   if (pred)
1878      tex->setPredicate(tex->cc, pred);
1879   condenseDefs(tex);
1880   condenseSrcs(tex, 0, c - 1);
1881}
1882
1883// Insert constraint markers for instructions whose multiple sources must be
1884// located in consecutive registers.
1885bool
1886RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
1887{
1888   TexInstruction *tex;
1889   Instruction *next;
1890   int s, size;
1891
1892   targ = bb->getProgram()->getTarget();
1893
1894   for (Instruction *i = bb->getEntry(); i; i = next) {
1895      next = i->next;
1896
1897      if ((tex = i->asTex())) {
1898         switch (targ->getChipset() & ~0xf) {
1899         case 0x50:
1900         case 0x80:
1901         case 0x90:
1902         case 0xa0:
1903            texConstraintNV50(tex);
1904            break;
1905         case 0xc0:
1906         case 0xd0:
1907            texConstraintNVC0(tex);
1908            break;
1909         case 0xe0:
1910            texConstraintNVE0(tex);
1911            break;
1912         default:
1913            break;
1914         }
1915      } else
1916      if (i->op == OP_EXPORT || i->op == OP_STORE) {
1917         for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) {
1918            assert(i->srcExists(s));
1919            size -= i->getSrc(s)->reg.size;
1920         }
1921         condenseSrcs(i, 1, s - 1);
1922      } else
1923      if (i->op == OP_LOAD || i->op == OP_VFETCH) {
1924         condenseDefs(i);
1925         if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
1926            addHazard(i, i->src(0).getIndirect(0));
1927      } else
1928      if (i->op == OP_UNION) {
1929         constrList.push_back(i);
1930      }
1931   }
1932   return true;
1933}
1934
1935// Insert extra moves so that, if multiple register constraints on a value are
1936// in conflict, these conflicts can be resolved.
1937bool
1938RegAlloc::InsertConstraintsPass::insertConstraintMoves()
1939{
1940   for (std::list<Instruction *>::iterator it = constrList.begin();
1941        it != constrList.end();
1942        ++it) {
1943      Instruction *cst = *it;
1944      Instruction *mov;
1945
1946      if (cst->op == OP_SPLIT && 0) {
1947         // spilling splits is annoying, just make sure they're separate
1948         for (int d = 0; cst->defExists(d); ++d) {
1949            if (!cst->getDef(d)->refCount())
1950               continue;
1951            LValue *lval = new_LValue(func, cst->def(d).getFile());
1952            const uint8_t size = cst->def(d).getSize();
1953            lval->reg.size = size;
1954
1955            mov = new_Instruction(func, OP_MOV, typeOfSize(size));
1956            mov->setSrc(0, lval);
1957            mov->setDef(0, cst->getDef(d));
1958            cst->setDef(d, mov->getSrc(0));
1959            cst->bb->insertAfter(cst, mov);
1960
1961            cst->getSrc(0)->asLValue()->noSpill = 1;
1962            mov->getSrc(0)->asLValue()->noSpill = 1;
1963         }
1964      } else
1965      if (cst->op == OP_MERGE || cst->op == OP_UNION) {
1966         for (int s = 0; cst->srcExists(s); ++s) {
1967            const uint8_t size = cst->src(s).getSize();
1968
1969            if (!cst->getSrc(s)->defs.size()) {
1970               mov = new_Instruction(func, OP_NOP, typeOfSize(size));
1971               mov->setDef(0, cst->getSrc(s));
1972               cst->bb->insertBefore(cst, mov);
1973               continue;
1974            }
1975            assert(cst->getSrc(s)->defs.size() == 1); // still SSA
1976
1977            Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
1978            // catch some cases where don't really need MOVs
1979            if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs())
1980               continue;
1981
1982            LValue *lval = new_LValue(func, cst->src(s).getFile());
1983            lval->reg.size = size;
1984
1985            mov = new_Instruction(func, OP_MOV, typeOfSize(size));
1986            mov->setDef(0, lval);
1987            mov->setSrc(0, cst->getSrc(s));
1988            cst->setSrc(s, mov->getDef(0));
1989            cst->bb->insertBefore(cst, mov);
1990
1991            cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help
1992
1993            if (cst->op == OP_UNION)
1994               mov->setPredicate(defi->cc, defi->getPredicate());
1995         }
1996      }
1997   }
1998
1999   return true;
2000}
2001
2002} // namespace nv50_ir
2003