1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "nv50_ir.h" 24#include "nv50_ir_target.h" 25 26#include <stack> 27#include <limits> 28 29namespace nv50_ir { 30 31#define MAX_REGISTER_FILE_SIZE 256 32 33class RegisterSet 34{ 35public: 36 RegisterSet(const Target *); 37 38 void init(const Target *); 39 void reset(DataFile, bool resetMax = false); 40 41 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock); 42 void intersect(DataFile f, const RegisterSet *); 43 44 bool assign(int32_t& reg, DataFile f, unsigned int size); 45 void release(DataFile f, int32_t reg, unsigned int size); 46 bool occupy(DataFile f, int32_t reg, unsigned int size); 47 bool occupy(const Value *); 48 void occupyMask(DataFile f, int32_t reg, uint8_t mask); 49 50 inline int getMaxAssigned(DataFile f) const { return fill[f]; } 51 52 inline unsigned int getFileSize(DataFile f, uint8_t regSize) const 53 { 54 if (restrictedGPR16Range && f == FILE_GPR && regSize == 2) 55 return (last[f] + 1) / 2; 56 return last[f] + 1; 57 } 58 59 inline unsigned int units(DataFile f, unsigned int size) const 60 { 61 return size >> unit[f]; 62 } 63 // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary) 64 inline unsigned int idToBytes(Value *v) const 65 { 66 return v->reg.data.id * MIN2(v->reg.size, 4); 67 } 68 inline unsigned int idToUnits(Value *v) const 69 { 70 return units(v->reg.file, idToBytes(v)); 71 } 72 inline int bytesToId(Value *v, unsigned int bytes) const 73 { 74 if (v->reg.size < 4) 75 return units(v->reg.file, bytes); 76 return bytes / 4; 77 } 78 inline int unitsToId(DataFile f, int u, uint8_t size) const 79 { 80 if (u < 0) 81 return -1; 82 return (size < 4) ? u : ((u << unit[f]) / 4); 83 } 84 85 void print() const; 86 87private: 88 BitSet bits[LAST_REGISTER_FILE + 1]; 89 90 int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity 91 92 int last[LAST_REGISTER_FILE + 1]; 93 int fill[LAST_REGISTER_FILE + 1]; 94 95 const bool restrictedGPR16Range; 96}; 97 98void 99RegisterSet::reset(DataFile f, bool resetMax) 100{ 101 bits[f].fill(0); 102 if (resetMax) 103 fill[f] = -1; 104} 105 106void 107RegisterSet::init(const Target *targ) 108{ 109 for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) { 110 DataFile f = static_cast<DataFile>(rf); 111 last[rf] = targ->getFileSize(f) - 1; 112 unit[rf] = targ->getFileUnit(f); 113 fill[rf] = -1; 114 assert(last[rf] < MAX_REGISTER_FILE_SIZE); 115 bits[rf].allocate(last[rf] + 1, true); 116 } 117} 118 119RegisterSet::RegisterSet(const Target *targ) 120 : restrictedGPR16Range(targ->getChipset() < 0xc0) 121{ 122 init(targ); 123 for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i) 124 reset(static_cast<DataFile>(i)); 125} 126 127void 128RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock) 129{ 130 bits[f].periodicMask32(lock, unlock); 131} 132 133void 134RegisterSet::intersect(DataFile f, const RegisterSet *set) 135{ 136 bits[f] |= set->bits[f]; 137} 138 139void 140RegisterSet::print() const 141{ 142 INFO("GPR:"); 143 bits[FILE_GPR].print(); 144 INFO("\n"); 145} 146 147bool 148RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size) 149{ 150 reg = bits[f].findFreeRange(size); 151 if (reg < 0) 152 return false; 153 fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); 154 return true; 155} 156 157bool 158RegisterSet::occupy(const Value *v) 159{ 160 return occupy(v->reg.file, v->reg.data.id, v->reg.size >> unit[v->reg.file]); 161} 162 163void 164RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask) 165{ 166 bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32)); 167} 168 169bool 170RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size) 171{ 172 if (bits[f].testRange(reg, size)) 173 return false; 174 175 bits[f].setRange(reg, size); 176 177 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size); 178 179 fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); 180 181 return true; 182} 183 184void 185RegisterSet::release(DataFile f, int32_t reg, unsigned int size) 186{ 187 bits[f].clrRange(reg, size); 188 189 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size); 190} 191 192class RegAlloc 193{ 194public: 195 RegAlloc(Program *program) : prog(program), sequence(0) { } 196 197 bool exec(); 198 bool execFunc(); 199 200private: 201 class PhiMovesPass : public Pass { 202 private: 203 virtual bool visit(BasicBlock *); 204 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p); 205 }; 206 207 class ArgumentMovesPass : public Pass { 208 private: 209 virtual bool visit(BasicBlock *); 210 }; 211 212 class BuildIntervalsPass : public Pass { 213 private: 214 virtual bool visit(BasicBlock *); 215 void collectLiveValues(BasicBlock *); 216 void addLiveRange(Value *, const BasicBlock *, int end); 217 }; 218 219 class InsertConstraintsPass : public Pass { 220 public: 221 bool exec(Function *func); 222 private: 223 virtual bool visit(BasicBlock *); 224 225 bool insertConstraintMoves(); 226 227 void condenseDefs(Instruction *); 228 void condenseSrcs(Instruction *, const int first, const int last); 229 230 void addHazard(Instruction *i, const ValueRef *src); 231 void textureMask(TexInstruction *); 232 void addConstraint(Instruction *, int s, int n); 233 bool detectConflict(Instruction *, int s); 234 235 // target specific functions, TODO: put in subclass or Target 236 void texConstraintNV50(TexInstruction *); 237 void texConstraintNVC0(TexInstruction *); 238 void texConstraintNVE0(TexInstruction *); 239 240 std::list<Instruction *> constrList; 241 242 const Target *targ; 243 }; 244 245 bool buildLiveSets(BasicBlock *); 246 247private: 248 Program *prog; 249 Function *func; 250 251 // instructions in control flow / chronological order 252 ArrayList insns; 253 254 int sequence; // for manual passes through CFG 255}; 256 257typedef std::pair<Value *, Value *> ValuePair; 258 259class SpillCodeInserter 260{ 261public: 262 SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { } 263 264 bool run(const std::list<ValuePair>&); 265 266 Symbol *assignSlot(const Interval&, unsigned int size); 267 inline int32_t getStackSize() const { return stackSize; } 268 269private: 270 Function *func; 271 272 struct SpillSlot 273 { 274 Interval occup; 275 std::list<Value *> residents; // needed to recalculate occup 276 Symbol *sym; 277 int32_t offset; 278 inline uint8_t size() const { return sym->reg.size; } 279 }; 280 std::list<SpillSlot> slots; 281 int32_t stackSize; 282 int32_t stackBase; 283 284 LValue *unspill(Instruction *usei, LValue *, Value *slot); 285 void spill(Instruction *defi, Value *slot, LValue *); 286}; 287 288void 289RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, 290 const BasicBlock *bb, 291 int end) 292{ 293 Instruction *insn = val->getUniqueInsn(); 294 295 if (!insn) 296 insn = bb->getFirst(); 297 298 assert(bb->getFirst()->serial <= bb->getExit()->serial); 299 assert(bb->getExit()->serial + 1 >= end); 300 301 int begin = insn->serial; 302 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial) 303 begin = bb->getEntry()->serial; 304 305 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n", 306 val->id, begin, insn->serial, end); 307 308 if (begin != end) // empty ranges are only added as hazards for fixed regs 309 val->livei.extend(begin, end); 310} 311 312bool 313RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p) 314{ 315 if (b->cfg.incidentCount() <= 1) 316 return false; 317 318 int n = 0; 319 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next()) 320 if (ei.getType() == Graph::Edge::TREE || 321 ei.getType() == Graph::Edge::FORWARD) 322 ++n; 323 return (n == 2); 324} 325 326// For each operand of each PHI in b, generate a new value by inserting a MOV 327// at the end of the block it is coming from and replace the operand with its 328// result. This eliminates liveness conflicts and enables us to let values be 329// copied to the right register if such a conflict exists nonetheless. 330// 331// These MOVs are also crucial in making sure the live intervals of phi srces 332// are extended until the end of the loop, since they are not included in the 333// live-in sets. 334bool 335RegAlloc::PhiMovesPass::visit(BasicBlock *bb) 336{ 337 Instruction *phi, *mov; 338 BasicBlock *pb, *pn; 339 340 std::stack<BasicBlock *> stack; 341 342 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 343 pb = BasicBlock::get(ei.getNode()); 344 assert(pb); 345 if (needNewElseBlock(bb, pb)) 346 stack.push(pb); 347 } 348 while (!stack.empty()) { 349 pb = stack.top(); 350 pn = new BasicBlock(func); 351 stack.pop(); 352 353 pb->cfg.detach(&bb->cfg); 354 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE); 355 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); 356 357 assert(pb->getExit()->op != OP_CALL); 358 if (pb->getExit()->asFlow()->target.bb == bb) 359 pb->getExit()->asFlow()->target.bb = pn; 360 } 361 362 // insert MOVs (phi->src(j) should stem from j-th in-BB) 363 int j = 0; 364 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 365 pb = BasicBlock::get(ei.getNode()); 366 if (!pb->isTerminated()) 367 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb)); 368 369 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { 370 mov = new_Instruction(func, OP_MOV, TYPE_U32); 371 372 mov->setSrc(0, phi->getSrc(j)); 373 mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue())); 374 phi->setSrc(j, mov->getDef(0)); 375 376 pb->insertBefore(pb->getExit(), mov); 377 } 378 ++j; 379 } 380 381 return true; 382} 383 384bool 385RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb) 386{ 387 // Bind function call inputs/outputs to the same physical register 388 // the callee uses, inserting moves as appropriate for the case a 389 // conflict arises. 390 for (Instruction *i = bb->getEntry(); i; i = i->next) { 391 FlowInstruction *cal = i->asFlow(); 392 if (!cal || cal->op != OP_CALL || cal->builtin) 393 continue; 394 RegisterSet clobberSet(prog->getTarget()); 395 396 // Bind input values. 397 for (int s = 0; cal->srcExists(s); ++s) { 398 LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue()); 399 tmp->reg.data.id = cal->target.fn->ins[s].rep()->reg.data.id; 400 401 Instruction *mov = 402 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); 403 mov->setDef(0, tmp); 404 mov->setSrc(0, cal->getSrc(s)); 405 cal->setSrc(s, tmp); 406 407 bb->insertBefore(cal, mov); 408 } 409 410 // Bind output values. 411 for (int d = 0; cal->defExists(d); ++d) { 412 LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue()); 413 tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id; 414 415 Instruction *mov = 416 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); 417 mov->setSrc(0, tmp); 418 mov->setDef(0, cal->getDef(d)); 419 cal->setDef(d, tmp); 420 421 bb->insertAfter(cal, mov); 422 clobberSet.occupy(tmp); 423 } 424 425 // Bind clobbered values. 426 for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin(); 427 it != cal->target.fn->clobbers.end(); 428 ++it) { 429 if (clobberSet.occupy(*it)) { 430 Value *tmp = new_LValue(func, (*it)->asLValue()); 431 tmp->reg.data.id = (*it)->reg.data.id; 432 cal->setDef(cal->defCount(), tmp); 433 } 434 } 435 } 436 437 // Update the clobber set of the function. 438 if (BasicBlock::get(func->cfgExit) == bb) { 439 func->buildDefSets(); 440 for (unsigned int i = 0; i < bb->defSet.getSize(); ++i) 441 if (bb->defSet.test(i)) 442 func->clobbers.push_back(func->getLValue(i)); 443 } 444 445 return true; 446} 447 448// Build the set of live-in variables of bb. 449bool 450RegAlloc::buildLiveSets(BasicBlock *bb) 451{ 452 Function *f = bb->getFunction(); 453 BasicBlock *bn; 454 Instruction *i; 455 unsigned int s, d; 456 457 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId()); 458 459 bb->liveSet.allocate(func->allLValues.getSize(), false); 460 461 int n = 0; 462 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 463 bn = BasicBlock::get(ei.getNode()); 464 if (bn == bb) 465 continue; 466 if (bn->cfg.visit(sequence)) 467 if (!buildLiveSets(bn)) 468 return false; 469 if (n++ || bb->liveSet.marker) 470 bb->liveSet |= bn->liveSet; 471 else 472 bb->liveSet = bn->liveSet; 473 } 474 if (!n && !bb->liveSet.marker) 475 bb->liveSet.fill(0); 476 bb->liveSet.marker = true; 477 478 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { 479 INFO("BB:%i live set of out blocks:\n", bb->getId()); 480 bb->liveSet.print(); 481 } 482 483 // if (!bb->getEntry()) 484 // return true; 485 486 if (bb == BasicBlock::get(f->cfgExit)) { 487 for (std::deque<ValueRef>::iterator it = f->outs.begin(); 488 it != f->outs.end(); ++it) { 489 assert(it->get()->asLValue()); 490 bb->liveSet.set(it->get()->id); 491 } 492 } 493 494 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) { 495 for (d = 0; i->defExists(d); ++d) 496 bb->liveSet.clr(i->getDef(d)->id); 497 for (s = 0; i->srcExists(s); ++s) 498 if (i->getSrc(s)->asLValue()) 499 bb->liveSet.set(i->getSrc(s)->id); 500 } 501 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next) 502 bb->liveSet.clr(i->getDef(0)->id); 503 504 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { 505 INFO("BB:%i live set after propagation:\n", bb->getId()); 506 bb->liveSet.print(); 507 } 508 509 return true; 510} 511 512void 513RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb) 514{ 515 BasicBlock *bbA = NULL, *bbB = NULL; 516 517 if (bb->cfg.outgoingCount()) { 518 // trickery to save a loop of OR'ing liveSets 519 // aliasing works fine with BitSet::setOr 520 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 521 if (ei.getType() == Graph::Edge::DUMMY) 522 continue; 523 if (bbA) { 524 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet); 525 bbA = bb; 526 } else { 527 bbA = bbB; 528 } 529 bbB = BasicBlock::get(ei.getNode()); 530 } 531 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL); 532 } else 533 if (bb->cfg.incidentCount()) { 534 bb->liveSet.fill(0); 535 } 536} 537 538bool 539RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) 540{ 541 collectLiveValues(bb); 542 543 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId()); 544 545 // go through out blocks and delete phi sources that do not originate from 546 // the current block from the live set 547 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 548 BasicBlock *out = BasicBlock::get(ei.getNode()); 549 550 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) { 551 bb->liveSet.clr(i->getDef(0)->id); 552 553 for (int s = 0; i->srcExists(s); ++s) { 554 assert(i->src(s).getInsn()); 555 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? 556 bb->liveSet.set(i->getSrc(s)->id); 557 else 558 bb->liveSet.clr(i->getSrc(s)->id); 559 } 560 } 561 } 562 563 // remaining live-outs are live until end 564 if (bb->getExit()) { 565 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j) 566 if (bb->liveSet.test(j)) 567 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1); 568 } 569 570 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) { 571 for (int d = 0; i->defExists(d); ++d) { 572 bb->liveSet.clr(i->getDef(d)->id); 573 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs 574 i->getDef(d)->livei.extend(i->serial, i->serial); 575 } 576 577 for (int s = 0; i->srcExists(s); ++s) { 578 if (!i->getSrc(s)->asLValue()) 579 continue; 580 if (!bb->liveSet.test(i->getSrc(s)->id)) { 581 bb->liveSet.set(i->getSrc(s)->id); 582 addLiveRange(i->getSrc(s), bb, i->serial); 583 } 584 } 585 } 586 587 if (bb == BasicBlock::get(func->cfg.getRoot())) { 588 for (std::deque<ValueDef>::iterator it = func->ins.begin(); 589 it != func->ins.end(); ++it) { 590 if (it->get()->reg.data.id >= 0) // add hazard for fixed regs 591 it->get()->livei.extend(0, 1); 592 } 593 } 594 595 return true; 596} 597 598 599#define JOIN_MASK_PHI (1 << 0) 600#define JOIN_MASK_UNION (1 << 1) 601#define JOIN_MASK_MOV (1 << 2) 602#define JOIN_MASK_TEX (1 << 3) 603 604class GCRA 605{ 606public: 607 GCRA(Function *, SpillCodeInserter&); 608 ~GCRA(); 609 610 bool allocateRegisters(ArrayList& insns); 611 612 void printNodeInfo() const; 613 614private: 615 class RIG_Node : public Graph::Node 616 { 617 public: 618 RIG_Node(); 619 620 void init(const RegisterSet&, LValue *); 621 622 void addInterference(RIG_Node *); 623 void addRegPreference(RIG_Node *); 624 625 inline LValue *getValue() const 626 { 627 return reinterpret_cast<LValue *>(data); 628 } 629 inline void setValue(LValue *lval) { data = lval; } 630 631 inline uint8_t getCompMask() const 632 { 633 return ((1 << colors) - 1) << (reg & 7); 634 } 635 636 static inline RIG_Node *get(const Graph::EdgeIterator& ei) 637 { 638 return static_cast<RIG_Node *>(ei.getNode()); 639 } 640 641 public: 642 uint32_t degree; 643 uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable 644 uint16_t colors; 645 646 DataFile f; 647 int32_t reg; 648 649 float weight; 650 651 // list pointers for simplify() phase 652 RIG_Node *next; 653 RIG_Node *prev; 654 655 // union of the live intervals of all coalesced values (we want to retain 656 // the separate intervals for testing interference of compound values) 657 Interval livei; 658 659 std::list<RIG_Node *> prefRegs; 660 }; 661 662private: 663 inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; } 664 665 void buildRIG(ArrayList&); 666 bool coalesce(ArrayList&); 667 bool doCoalesce(ArrayList&, unsigned int mask); 668 void calculateSpillWeights(); 669 void simplify(); 670 bool selectRegisters(); 671 void cleanup(const bool success); 672 673 void simplifyEdge(RIG_Node *, RIG_Node *); 674 void simplifyNode(RIG_Node *); 675 676 bool coalesceValues(Value *, Value *, bool force); 677 void resolveSplitsAndMerges(); 678 void makeCompound(Instruction *, bool isSplit); 679 680 inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&); 681 682 inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *); 683 void checkList(std::list<RIG_Node *>&); 684 685private: 686 std::stack<uint32_t> stack; 687 688 // list headers for simplify() phase 689 RIG_Node lo[2]; 690 RIG_Node hi; 691 692 Graph RIG; 693 RIG_Node *nodes; 694 unsigned int nodeCount; 695 696 Function *func; 697 Program *prog; 698 699 static uint8_t relDegree[17][17]; 700 701 RegisterSet regs; 702 703 // need to fixup register id for participants of OP_MERGE/SPLIT 704 std::list<Instruction *> merges; 705 std::list<Instruction *> splits; 706 707 SpillCodeInserter& spill; 708 std::list<ValuePair> mustSpill; 709}; 710 711uint8_t GCRA::relDegree[17][17]; 712 713GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this) 714{ 715 colors = 0; 716} 717 718void 719GCRA::printNodeInfo() const 720{ 721 for (unsigned int i = 0; i < nodeCount; ++i) { 722 if (!nodes[i].colors) 723 continue; 724 INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X", 725 i, 726 nodes[i].f,nodes[i].reg,nodes[i].colors, 727 nodes[i].weight, 728 nodes[i].degree, nodes[i].degreeLimit); 729 730 for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next()) 731 INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); 732 for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next()) 733 INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); 734 INFO("\n"); 735 } 736} 737 738void 739GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) 740{ 741 setValue(lval); 742 if (lval->reg.data.id >= 0) 743 lval->noSpill = lval->fixedReg = 1; 744 745 colors = regs.units(lval->reg.file, lval->reg.size); 746 f = lval->reg.file; 747 reg = -1; 748 if (lval->reg.data.id >= 0) 749 reg = regs.idToUnits(lval); 750 751 weight = std::numeric_limits<float>::infinity(); 752 degree = 0; 753 degreeLimit = regs.getFileSize(f, lval->reg.size); 754 755 livei.insert(lval->livei); 756} 757 758bool 759GCRA::coalesceValues(Value *dst, Value *src, bool force) 760{ 761 LValue *rep = dst->join->asLValue(); 762 LValue *val = src->join->asLValue(); 763 764 if (!force && val->reg.data.id >= 0) { 765 rep = src->join->asLValue(); 766 val = dst->join->asLValue(); 767 } 768 RIG_Node *nRep = &nodes[rep->id]; 769 RIG_Node *nVal = &nodes[val->id]; 770 771 if (src->reg.file != dst->reg.file) { 772 if (!force) 773 return false; 774 WARN("forced coalescing of values in different files !\n"); 775 } 776 if (!force && dst->reg.size != src->reg.size) 777 return false; 778 779 if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) { 780 if (force) { 781 if (val->reg.data.id >= 0) 782 WARN("forced coalescing of values in different fixed regs !\n"); 783 } else { 784 if (val->reg.data.id >= 0) 785 return false; 786 // make sure that there is no overlap with the fixed register of rep 787 for (ArrayList::Iterator it = func->allLValues.iterator(); 788 !it.end(); it.next()) { 789 Value *reg = reinterpret_cast<Value *>(it.get())->asLValue(); 790 assert(reg); 791 if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei)) 792 return false; 793 } 794 } 795 } 796 797 if (!force && nRep->livei.overlaps(nVal->livei)) 798 return false; 799 800 INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", 801 rep->id, rep->reg.data.id, val->id); 802 803 // set join pointer of all values joined with val 804 for (Value::DefIterator def = val->defs.begin(); def != val->defs.end(); 805 ++def) 806 (*def)->get()->join = rep; 807 assert(rep->join == rep && val->join == rep); 808 809 // add val's definitions to rep and extend the live interval of its RIG node 810 rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); 811 nRep->livei.unify(nVal->livei); 812 return true; 813} 814 815bool 816GCRA::coalesce(ArrayList& insns) 817{ 818 bool ret = doCoalesce(insns, JOIN_MASK_PHI); 819 if (!ret) 820 return false; 821 switch (func->getProgram()->getTarget()->getChipset() & ~0xf) { 822 case 0x50: 823 case 0x80: 824 case 0x90: 825 case 0xa0: 826 ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); 827 break; 828 case 0xc0: 829 case 0xd0: 830 case 0xe0: 831 ret = doCoalesce(insns, JOIN_MASK_UNION); 832 break; 833 default: 834 break; 835 } 836 if (!ret) 837 return false; 838 return doCoalesce(insns, JOIN_MASK_MOV); 839} 840 841static inline uint8_t makeCompMask(int compSize, int base, int size) 842{ 843 uint8_t m = ((1 << size) - 1) << base; 844 845 switch (compSize) { 846 case 1: 847 return 0xff; 848 case 2: 849 m |= (m << 2); 850 return (m << 4) | m; 851 case 3: 852 case 4: 853 return (m << 4) | m; 854 default: 855 assert(compSize <= 8); 856 return m; 857 } 858} 859 860static inline void copyCompound(Value *dst, Value *src) 861{ 862 LValue *ldst = dst->asLValue(); 863 LValue *lsrc = src->asLValue(); 864 865 ldst->compound = lsrc->compound; 866 ldst->compMask = lsrc->compMask; 867} 868 869void 870GCRA::makeCompound(Instruction *insn, bool split) 871{ 872 LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue(); 873 874 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { 875 INFO("makeCompound(split = %i): ", split); 876 insn->print(); 877 } 878 879 const unsigned int size = getNode(rep)->colors; 880 unsigned int base = 0; 881 882 if (!rep->compound) 883 rep->compMask = 0xff; 884 rep->compound = 1; 885 886 for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) { 887 LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue(); 888 889 val->compound = 1; 890 if (!val->compMask) 891 val->compMask = 0xff; 892 val->compMask &= makeCompMask(size, base, getNode(val)->colors); 893 assert(val->compMask); 894 895 INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n", 896 rep->id, rep->compMask, val->id, val->compMask); 897 898 base += getNode(val)->colors; 899 } 900 assert(base == size); 901} 902 903bool 904GCRA::doCoalesce(ArrayList& insns, unsigned int mask) 905{ 906 int c, n; 907 908 for (n = 0; n < insns.getSize(); ++n) { 909 Instruction *i; 910 Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n)); 911 912 switch (insn->op) { 913 case OP_PHI: 914 if (!(mask & JOIN_MASK_PHI)) 915 break; 916 for (c = 0; insn->srcExists(c); ++c) 917 if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) { 918 // this is bad 919 ERROR("failed to coalesce phi operands\n"); 920 return false; 921 } 922 break; 923 case OP_UNION: 924 case OP_MERGE: 925 if (!(mask & JOIN_MASK_UNION)) 926 break; 927 for (c = 0; insn->srcExists(c); ++c) 928 coalesceValues(insn->getDef(0), insn->getSrc(c), true); 929 if (insn->op == OP_MERGE) { 930 merges.push_back(insn); 931 if (insn->srcExists(1)) 932 makeCompound(insn, false); 933 } 934 break; 935 case OP_SPLIT: 936 if (!(mask & JOIN_MASK_UNION)) 937 break; 938 splits.push_back(insn); 939 for (c = 0; insn->defExists(c); ++c) 940 coalesceValues(insn->getSrc(0), insn->getDef(c), true); 941 makeCompound(insn, true); 942 break; 943 case OP_MOV: 944 if (!(mask & JOIN_MASK_MOV)) 945 break; 946 i = NULL; 947 if (!insn->getDef(0)->uses.empty()) 948 i = insn->getDef(0)->uses.front()->getInsn(); 949 // if this is a contraint-move there will only be a single use 950 if (i && i->op == OP_MERGE) // do we really still need this ? 951 break; 952 i = insn->getSrc(0)->getUniqueInsn(); 953 if (i && !i->constrainedDefs()) { 954 if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) 955 copyCompound(insn->getSrc(0), insn->getDef(0)); 956 } 957 break; 958 case OP_TEX: 959 case OP_TXB: 960 case OP_TXL: 961 case OP_TXF: 962 case OP_TXQ: 963 case OP_TXD: 964 case OP_TXG: 965 case OP_TEXCSAA: 966 if (!(mask & JOIN_MASK_TEX)) 967 break; 968 for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c) 969 coalesceValues(insn->getDef(c), insn->getSrc(c), true); 970 break; 971 default: 972 break; 973 } 974 } 975 return true; 976} 977 978void 979GCRA::RIG_Node::addInterference(RIG_Node *node) 980{ 981 this->degree += relDegree[node->colors][colors]; 982 node->degree += relDegree[colors][node->colors]; 983 984 this->attach(node, Graph::Edge::CROSS); 985} 986 987void 988GCRA::RIG_Node::addRegPreference(RIG_Node *node) 989{ 990 prefRegs.push_back(node); 991} 992 993GCRA::GCRA(Function *fn, SpillCodeInserter& spill) : 994 func(fn), 995 regs(fn->getProgram()->getTarget()), 996 spill(spill) 997{ 998 prog = func->getProgram(); 999 1000 // initialize relative degrees array - i takes away from j 1001 for (int i = 1; i <= 16; ++i) 1002 for (int j = 1; j <= 16; ++j) 1003 relDegree[i][j] = j * ((i + j - 1) / j); 1004} 1005 1006GCRA::~GCRA() 1007{ 1008 if (nodes) 1009 delete[] nodes; 1010} 1011 1012void 1013GCRA::checkList(std::list<RIG_Node *>& lst) 1014{ 1015 GCRA::RIG_Node *prev = NULL; 1016 1017 for (std::list<RIG_Node *>::iterator it = lst.begin(); 1018 it != lst.end(); 1019 ++it) { 1020 assert((*it)->getValue()->join == (*it)->getValue()); 1021 if (prev) 1022 assert(prev->livei.begin() <= (*it)->livei.begin()); 1023 prev = *it; 1024 } 1025} 1026 1027void 1028GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node) 1029{ 1030 if (node->livei.isEmpty()) 1031 return; 1032 // only the intervals of joined values don't necessarily arrive in order 1033 std::list<RIG_Node *>::iterator prev, it; 1034 for (it = list.end(); it != list.begin(); it = prev) { 1035 prev = it; 1036 --prev; 1037 if ((*prev)->livei.begin() <= node->livei.begin()) 1038 break; 1039 } 1040 list.insert(it, node); 1041} 1042 1043void 1044GCRA::buildRIG(ArrayList& insns) 1045{ 1046 std::list<RIG_Node *> values, active; 1047 1048 for (std::deque<ValueDef>::iterator it = func->ins.begin(); 1049 it != func->ins.end(); ++it) 1050 insertOrderedTail(values, getNode(it->get()->asLValue())); 1051 1052 for (int i = 0; i < insns.getSize(); ++i) { 1053 Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i)); 1054 for (int d = 0; insn->defExists(d); ++d) 1055 if (insn->getDef(d)->rep() == insn->getDef(d)) 1056 insertOrderedTail(values, getNode(insn->getDef(d)->asLValue())); 1057 } 1058 checkList(values); 1059 1060 while (!values.empty()) { 1061 RIG_Node *cur = values.front(); 1062 1063 for (std::list<RIG_Node *>::iterator it = active.begin(); 1064 it != active.end(); 1065 ++it) { 1066 RIG_Node *node = *it; 1067 1068 if (node->livei.end() <= cur->livei.begin()) { 1069 it = active.erase(it); 1070 --it; 1071 } else 1072 if (node->f == cur->f && node->livei.overlaps(cur->livei)) { 1073 cur->addInterference(node); 1074 } 1075 } 1076 values.pop_front(); 1077 active.push_back(cur); 1078 } 1079} 1080 1081void 1082GCRA::calculateSpillWeights() 1083{ 1084 for (unsigned int i = 0; i < nodeCount; ++i) { 1085 RIG_Node *const n = &nodes[i]; 1086 if (!nodes[i].colors || nodes[i].livei.isEmpty()) 1087 continue; 1088 if (nodes[i].reg >= 0) { 1089 // update max reg 1090 regs.occupy(n->f, n->reg, n->colors); 1091 continue; 1092 } 1093 LValue *val = nodes[i].getValue(); 1094 1095 if (!val->noSpill) { 1096 int rc = 0; 1097 for (Value::DefIterator it = val->defs.begin(); 1098 it != val->defs.end(); 1099 ++it) 1100 rc += (*it)->get()->refCount(); 1101 1102 nodes[i].weight = 1103 (float)rc * (float)rc / (float)nodes[i].livei.extent(); 1104 } 1105 1106 if (nodes[i].degree < nodes[i].degreeLimit) { 1107 int l = 0; 1108 if (val->reg.size > 4) 1109 l = 1; 1110 DLLIST_ADDHEAD(&lo[l], &nodes[i]); 1111 } else { 1112 DLLIST_ADDHEAD(&hi, &nodes[i]); 1113 } 1114 } 1115 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1116 printNodeInfo(); 1117} 1118 1119void 1120GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b) 1121{ 1122 bool move = b->degree >= b->degreeLimit; 1123 1124 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1125 "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n", 1126 a->getValue()->id, a->degree, a->degreeLimit, 1127 b->getValue()->id, b->degree, b->degreeLimit); 1128 1129 b->degree -= relDegree[a->colors][b->colors]; 1130 1131 move = move && b->degree < b->degreeLimit; 1132 if (move && !DLLIST_EMPTY(b)) { 1133 int l = (b->getValue()->reg.size > 4) ? 1 : 0; 1134 DLLIST_DEL(b); 1135 DLLIST_ADDTAIL(&lo[l], b); 1136 } 1137} 1138 1139void 1140GCRA::simplifyNode(RIG_Node *node) 1141{ 1142 for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) 1143 simplifyEdge(node, RIG_Node::get(ei)); 1144 1145 for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) 1146 simplifyEdge(node, RIG_Node::get(ei)); 1147 1148 DLLIST_DEL(node); 1149 stack.push(node->getValue()->id); 1150 1151 INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n", 1152 node->getValue()->id, 1153 (node->degree < node->degreeLimit) ? "" : "(spill)"); 1154} 1155 1156void 1157GCRA::simplify() 1158{ 1159 for (;;) { 1160 if (!DLLIST_EMPTY(&lo[0])) { 1161 do { 1162 simplifyNode(lo[0].next); 1163 } while (!DLLIST_EMPTY(&lo[0])); 1164 } else 1165 if (!DLLIST_EMPTY(&lo[1])) { 1166 simplifyNode(lo[1].next); 1167 } else 1168 if (!DLLIST_EMPTY(&hi)) { 1169 RIG_Node *best = hi.next; 1170 float bestScore = best->weight / (float)best->degree; 1171 // spill candidate 1172 for (RIG_Node *it = best->next; it != &hi; it = it->next) { 1173 float score = it->weight / (float)it->degree; 1174 if (score < bestScore) { 1175 best = it; 1176 bestScore = score; 1177 } 1178 } 1179 if (isinf(bestScore)) { 1180 ERROR("no viable spill candidates left\n"); 1181 break; 1182 } 1183 simplifyNode(best); 1184 } else { 1185 break; 1186 } 1187 } 1188} 1189 1190void 1191GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei) 1192{ 1193 const RIG_Node *intf = RIG_Node::get(ei); 1194 1195 if (intf->reg < 0) 1196 return; 1197 const LValue *vA = node->getValue(); 1198 const LValue *vB = intf->getValue(); 1199 1200 const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7); 1201 1202 if (vA->compound | vB->compound) { 1203 // NOTE: this only works for >aligned< register tuples ! 1204 for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) { 1205 for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) { 1206 const LValue *vD = (*D)->get()->asLValue(); 1207 const LValue *vd = (*d)->get()->asLValue(); 1208 1209 if (!vD->livei.overlaps(vd->livei)) { 1210 INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n", 1211 vD->id, vd->id); 1212 continue; 1213 } 1214 1215 uint8_t mask = vD->compound ? vD->compMask : ~0; 1216 if (vd->compound) { 1217 assert(vB->compound); 1218 mask &= vd->compMask & vB->compMask; 1219 } else { 1220 mask &= intfMask; 1221 } 1222 1223 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1224 "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n", 1225 vD->id, 1226 vD->compound ? vD->compMask : 0xff, 1227 vd->id, 1228 vd->compound ? vd->compMask : intfMask, 1229 vB->compMask, intf->reg & ~7, mask); 1230 if (mask) 1231 regs.occupyMask(node->f, intf->reg & ~7, mask); 1232 } 1233 } 1234 } else { 1235 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1236 "(%%%i) X (%%%i): $r%i + %u\n", 1237 vA->id, vB->id, intf->reg, intf->colors); 1238 regs.occupy(node->f, intf->reg, intf->colors); 1239 } 1240} 1241 1242bool 1243GCRA::selectRegisters() 1244{ 1245 INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n"); 1246 1247 while (!stack.empty()) { 1248 RIG_Node *node = &nodes[stack.top()]; 1249 stack.pop(); 1250 1251 regs.reset(node->f); 1252 1253 INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n", 1254 node->getValue()->id, node->colors); 1255 1256 for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) 1257 checkInterference(node, ei); 1258 for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) 1259 checkInterference(node, ei); 1260 1261 if (!node->prefRegs.empty()) { 1262 for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin(); 1263 it != node->prefRegs.end(); 1264 ++it) { 1265 if ((*it)->reg >= 0 && 1266 regs.occupy(node->f, (*it)->reg, node->colors)) { 1267 node->reg = (*it)->reg; 1268 break; 1269 } 1270 } 1271 } 1272 if (node->reg >= 0) 1273 continue; 1274 LValue *lval = node->getValue(); 1275 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1276 regs.print(); 1277 bool ret = regs.assign(node->reg, node->f, node->colors); 1278 if (ret) { 1279 INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); 1280 lval->compMask = node->getCompMask(); 1281 } else { 1282 INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n", 1283 lval->id, lval->reg.size); 1284 Symbol *slot = NULL; 1285 if (lval->reg.file == FILE_GPR) 1286 slot = spill.assignSlot(node->livei, lval->reg.size); 1287 mustSpill.push_back(ValuePair(lval, slot)); 1288 } 1289 } 1290 if (!mustSpill.empty()) 1291 return false; 1292 for (unsigned int i = 0; i < nodeCount; ++i) { 1293 LValue *lval = nodes[i].getValue(); 1294 if (nodes[i].reg >= 0 && nodes[i].colors > 0) 1295 lval->reg.data.id = 1296 regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size); 1297 } 1298 return true; 1299} 1300 1301bool 1302GCRA::allocateRegisters(ArrayList& insns) 1303{ 1304 bool ret; 1305 1306 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1307 "allocateRegisters to %u instructions\n", insns.getSize()); 1308 1309 nodeCount = func->allLValues.getSize(); 1310 nodes = new RIG_Node[nodeCount]; 1311 if (!nodes) 1312 return false; 1313 for (unsigned int i = 0; i < nodeCount; ++i) { 1314 LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i)); 1315 if (lval) { 1316 nodes[i].init(regs, lval); 1317 RIG.insert(&nodes[i]); 1318 } 1319 } 1320 1321 // coalesce first, we use only 1 RIG node for a group of joined values 1322 ret = coalesce(insns); 1323 if (!ret) 1324 goto out; 1325 1326 if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1327 func->printLiveIntervals(); 1328 1329 buildRIG(insns); 1330 calculateSpillWeights(); 1331 simplify(); 1332 1333 ret = selectRegisters(); 1334 if (!ret) { 1335 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1336 "selectRegisters failed, inserting spill code ...\n"); 1337 regs.reset(FILE_GPR, true); 1338 spill.run(mustSpill); 1339 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1340 func->print(); 1341 } else { 1342 prog->maxGPR = regs.getMaxAssigned(FILE_GPR); 1343 } 1344 1345out: 1346 cleanup(ret); 1347 return ret; 1348} 1349 1350void 1351GCRA::cleanup(const bool success) 1352{ 1353 mustSpill.clear(); 1354 1355 for (ArrayList::Iterator it = func->allLValues.iterator(); 1356 !it.end(); it.next()) { 1357 LValue *lval = reinterpret_cast<LValue *>(it.get()); 1358 1359 lval->livei.clear(); 1360 1361 lval->compound = 0; 1362 lval->compMask = 0; 1363 1364 if (lval->join == lval) 1365 continue; 1366 1367 if (success) { 1368 lval->reg.data.id = lval->join->reg.data.id; 1369 } else { 1370 for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); 1371 ++d) 1372 lval->join->defs.remove(*d); 1373 lval->join = lval; 1374 } 1375 } 1376 1377 if (success) 1378 resolveSplitsAndMerges(); 1379 splits.clear(); // avoid duplicate entries on next coalesce pass 1380 merges.clear(); 1381 1382 delete[] nodes; 1383 nodes = NULL; 1384} 1385 1386Symbol * 1387SpillCodeInserter::assignSlot(const Interval &livei, unsigned int size) 1388{ 1389 SpillSlot slot; 1390 int32_t offsetBase = stackSize; 1391 int32_t offset; 1392 std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin(); 1393 1394 if (offsetBase % size) 1395 offsetBase += size - (offsetBase % size); 1396 1397 slot.sym = NULL; 1398 1399 for (offset = offsetBase; offset < stackSize; offset += size) { 1400 while (it != slots.end() && it->offset < offset) 1401 ++it; 1402 if (it == slots.end()) // no slots left 1403 break; 1404 std::list<SpillSlot>::iterator bgn = it; 1405 1406 while (it != slots.end() && it->offset < (offset + size)) { 1407 it->occup.print(); 1408 if (it->occup.overlaps(livei)) 1409 break; 1410 ++it; 1411 } 1412 if (it == slots.end() || it->offset >= (offset + size)) { 1413 // fits 1414 for (; bgn != slots.end() && bgn->offset < (offset + size); ++bgn) { 1415 bgn->occup.insert(livei); 1416 if (bgn->size() == size) 1417 slot.sym = bgn->sym; 1418 } 1419 break; 1420 } 1421 } 1422 if (!slot.sym) { 1423 stackSize = offset + size; 1424 slot.offset = offset; 1425 slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL); 1426 if (!func->stackPtr) 1427 offset += func->tlsBase; 1428 slot.sym->setAddress(NULL, offset); 1429 slot.sym->reg.size = size; 1430 slots.insert(pos, slot)->occup.insert(livei); 1431 } 1432 return slot.sym; 1433} 1434 1435void 1436SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) 1437{ 1438 const DataType ty = typeOfSize(slot->reg.size); 1439 1440 Instruction *st; 1441 if (slot->reg.file == FILE_MEMORY_LOCAL) { 1442 st = new_Instruction(func, OP_STORE, ty); 1443 st->setSrc(0, slot); 1444 st->setSrc(1, lval); 1445 lval->noSpill = 1; 1446 } else { 1447 st = new_Instruction(func, OP_CVT, ty); 1448 st->setDef(0, slot); 1449 st->setSrc(0, lval); 1450 } 1451 defi->bb->insertAfter(defi, st); 1452} 1453 1454LValue * 1455SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) 1456{ 1457 const DataType ty = typeOfSize(slot->reg.size); 1458 1459 lval = cloneShallow(func, lval); 1460 1461 Instruction *ld; 1462 if (slot->reg.file == FILE_MEMORY_LOCAL) { 1463 lval->noSpill = 1; 1464 ld = new_Instruction(func, OP_LOAD, ty); 1465 } else { 1466 ld = new_Instruction(func, OP_CVT, ty); 1467 } 1468 ld->setDef(0, lval); 1469 ld->setSrc(0, slot); 1470 1471 usei->bb->insertBefore(usei, ld); 1472 return lval; 1473} 1474 1475bool 1476SpillCodeInserter::run(const std::list<ValuePair>& lst) 1477{ 1478 for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end(); 1479 ++it) { 1480 LValue *lval = it->first->asLValue(); 1481 Symbol *mem = it->second ? it->second->asSym() : NULL; 1482 1483 for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); 1484 ++d) { 1485 Value *slot = mem ? 1486 static_cast<Value *>(mem) : new_LValue(func, FILE_GPR); 1487 Value *tmp = NULL; 1488 Instruction *last = NULL; 1489 1490 LValue *dval = (*d)->get()->asLValue(); 1491 Instruction *defi = (*d)->getInsn(); 1492 1493 // handle uses first or they'll contain the spill stores 1494 while (!dval->uses.empty()) { 1495 ValueRef *u = dval->uses.front(); 1496 Instruction *usei = u->getInsn(); 1497 assert(usei); 1498 if (usei->op == OP_PHI) { 1499 tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot; 1500 last = NULL; 1501 } else 1502 if (!last || usei != last->next) { // TODO: sort uses 1503 tmp = unspill(usei, dval, slot); 1504 last = usei; 1505 } 1506 u->set(tmp); 1507 } 1508 1509 assert(defi); 1510 if (defi->op == OP_PHI) { 1511 d = lval->defs.erase(d); 1512 --d; 1513 if (slot->reg.file == FILE_MEMORY_LOCAL) 1514 delete_Instruction(func->getProgram(), defi); 1515 else 1516 defi->setDef(0, slot); 1517 } else { 1518 spill(defi, slot, dval); 1519 } 1520 } 1521 1522 } 1523 1524 // TODO: We're not trying to reuse old slots in a potential next iteration. 1525 // We have to update the slots' livei intervals to be able to do that. 1526 stackBase = stackSize; 1527 slots.clear(); 1528 return true; 1529} 1530 1531bool 1532RegAlloc::exec() 1533{ 1534 for (IteratorRef it = prog->calls.iteratorDFS(false); 1535 !it->end(); it->next()) { 1536 func = Function::get(reinterpret_cast<Graph::Node *>(it->get())); 1537 1538 func->tlsBase = prog->tlsSize; 1539 if (!execFunc()) 1540 return false; 1541 prog->tlsSize += func->tlsSize; 1542 } 1543 return true; 1544} 1545 1546bool 1547RegAlloc::execFunc() 1548{ 1549 InsertConstraintsPass insertConstr; 1550 PhiMovesPass insertPhiMoves; 1551 ArgumentMovesPass insertArgMoves; 1552 BuildIntervalsPass buildIntervals; 1553 SpillCodeInserter insertSpills(func); 1554 1555 GCRA gcra(func, insertSpills); 1556 1557 unsigned int i, retries; 1558 bool ret; 1559 1560 ret = insertConstr.exec(func); 1561 if (!ret) 1562 goto out; 1563 1564 ret = insertPhiMoves.run(func); 1565 if (!ret) 1566 goto out; 1567 1568 ret = insertArgMoves.run(func); 1569 if (!ret) 1570 goto out; 1571 1572 // TODO: need to fix up spill slot usage ranges to support > 1 retry 1573 for (retries = 0; retries < 3; ++retries) { 1574 if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)) 1575 INFO("Retry: %i\n", retries); 1576 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1577 func->print(); 1578 1579 // spilling to registers may add live ranges, need to rebuild everything 1580 ret = true; 1581 for (sequence = func->cfg.nextSequence(), i = 0; 1582 ret && i <= func->loopNestingBound; 1583 sequence = func->cfg.nextSequence(), ++i) 1584 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot())); 1585 if (!ret) 1586 break; 1587 func->orderInstructions(this->insns); 1588 1589 ret = buildIntervals.run(func); 1590 if (!ret) 1591 break; 1592 ret = gcra.allocateRegisters(insns); 1593 if (ret) 1594 break; // success 1595 } 1596 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret); 1597 1598 func->tlsSize = insertSpills.getStackSize(); 1599out: 1600 return ret; 1601} 1602 1603// TODO: check if modifying Instruction::join here breaks anything 1604void 1605GCRA::resolveSplitsAndMerges() 1606{ 1607 for (std::list<Instruction *>::iterator it = splits.begin(); 1608 it != splits.end(); 1609 ++it) { 1610 Instruction *split = *it; 1611 unsigned int reg = regs.idToBytes(split->getSrc(0)); 1612 for (int d = 0; split->defExists(d); ++d) { 1613 Value *v = split->getDef(d); 1614 v->reg.data.id = regs.bytesToId(v, reg); 1615 v->join = v; 1616 reg += v->reg.size; 1617 } 1618 } 1619 splits.clear(); 1620 1621 for (std::list<Instruction *>::iterator it = merges.begin(); 1622 it != merges.end(); 1623 ++it) { 1624 Instruction *merge = *it; 1625 unsigned int reg = regs.idToBytes(merge->getDef(0)); 1626 for (int s = 0; merge->srcExists(s); ++s) { 1627 Value *v = merge->getSrc(s); 1628 v->reg.data.id = regs.bytesToId(v, reg); 1629 v->join = v; 1630 reg += v->reg.size; 1631 } 1632 } 1633 merges.clear(); 1634} 1635 1636bool Program::registerAllocation() 1637{ 1638 RegAlloc ra(this); 1639 return ra.exec(); 1640} 1641 1642bool 1643RegAlloc::InsertConstraintsPass::exec(Function *ir) 1644{ 1645 constrList.clear(); 1646 1647 bool ret = run(ir, true, true); 1648 if (ret) 1649 ret = insertConstraintMoves(); 1650 return ret; 1651} 1652 1653// TODO: make part of texture insn 1654void 1655RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex) 1656{ 1657 Value *def[4]; 1658 int c, k, d; 1659 uint8_t mask = 0; 1660 1661 for (d = 0, k = 0, c = 0; c < 4; ++c) { 1662 if (!(tex->tex.mask & (1 << c))) 1663 continue; 1664 if (tex->getDef(k)->refCount()) { 1665 mask |= 1 << c; 1666 def[d++] = tex->getDef(k); 1667 } 1668 ++k; 1669 } 1670 tex->tex.mask = mask; 1671 1672 for (c = 0; c < d; ++c) 1673 tex->setDef(c, def[c]); 1674 for (; c < 4; ++c) 1675 tex->setDef(c, NULL); 1676} 1677 1678bool 1679RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s) 1680{ 1681 Value *v = cst->getSrc(s); 1682 1683 // current register allocation can't handle it if a value participates in 1684 // multiple constraints 1685 for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) { 1686 if (cst != (*it)->getInsn()) 1687 return true; 1688 } 1689 1690 // can start at s + 1 because detectConflict is called on all sources 1691 for (int c = s + 1; cst->srcExists(c); ++c) 1692 if (v == cst->getSrc(c)) 1693 return true; 1694 1695 Instruction *defi = v->getInsn(); 1696 1697 return (!defi || defi->constrainedDefs()); 1698} 1699 1700void 1701RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n) 1702{ 1703 Instruction *cst; 1704 int d; 1705 1706 // first, look for an existing identical constraint op 1707 for (std::list<Instruction *>::iterator it = constrList.begin(); 1708 it != constrList.end(); 1709 ++it) { 1710 cst = (*it); 1711 if (!i->bb->dominatedBy(cst->bb)) 1712 break; 1713 for (d = 0; d < n; ++d) 1714 if (cst->getSrc(d) != i->getSrc(d + s)) 1715 break; 1716 if (d >= n) { 1717 for (d = 0; d < n; ++d, ++s) 1718 i->setSrc(s, cst->getDef(d)); 1719 return; 1720 } 1721 } 1722 cst = new_Instruction(func, OP_CONSTRAINT, i->dType); 1723 1724 for (d = 0; d < n; ++s, ++d) { 1725 cst->setDef(d, new_LValue(func, FILE_GPR)); 1726 cst->setSrc(d, i->getSrc(s)); 1727 i->setSrc(s, cst->getDef(d)); 1728 } 1729 i->bb->insertBefore(i, cst); 1730 1731 constrList.push_back(cst); 1732} 1733 1734// Add a dummy use of the pointer source of >= 8 byte loads after the load 1735// to prevent it from being assigned a register which overlapping the load's 1736// destination, which would produce random corruptions. 1737void 1738RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src) 1739{ 1740 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE); 1741 hzd->setSrc(0, src->get()); 1742 i->bb->insertAfter(i, hzd); 1743 1744} 1745 1746// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q 1747void 1748RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn) 1749{ 1750 uint8_t size = 0; 1751 int n; 1752 for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n) 1753 size += insn->getDef(n)->reg.size; 1754 if (n < 2) 1755 return; 1756 LValue *lval = new_LValue(func, FILE_GPR); 1757 lval->reg.size = size; 1758 1759 Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size)); 1760 split->setSrc(0, lval); 1761 for (int d = 0; d < n; ++d) { 1762 split->setDef(d, insn->getDef(d)); 1763 insn->setDef(d, NULL); 1764 } 1765 insn->setDef(0, lval); 1766 1767 for (int k = 1, d = n; insn->defExists(d); ++d, ++k) { 1768 insn->setDef(k, insn->getDef(d)); 1769 insn->setDef(d, NULL); 1770 } 1771 // carry over predicate if any (mainly for OP_UNION uses) 1772 split->setPredicate(insn->cc, insn->getPredicate()); 1773 1774 insn->bb->insertAfter(insn, split); 1775 constrList.push_back(split); 1776} 1777void 1778RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, 1779 const int a, const int b) 1780{ 1781 uint8_t size = 0; 1782 if (a >= b) 1783 return; 1784 for (int s = a; s <= b; ++s) 1785 size += insn->getSrc(s)->reg.size; 1786 if (!size) 1787 return; 1788 LValue *lval = new_LValue(func, FILE_GPR); 1789 lval->reg.size = size; 1790 1791 Value *save[3]; 1792 insn->takeExtraSources(0, save); 1793 1794 Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size)); 1795 merge->setDef(0, lval); 1796 for (int s = a, i = 0; s <= b; ++s, ++i) { 1797 merge->setSrc(i, insn->getSrc(s)); 1798 insn->setSrc(s, NULL); 1799 } 1800 insn->setSrc(a, lval); 1801 1802 for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) { 1803 insn->setSrc(k, insn->getSrc(s)); 1804 insn->setSrc(s, NULL); 1805 } 1806 insn->bb->insertBefore(insn, merge); 1807 1808 insn->putExtraSources(0, save); 1809 1810 constrList.push_back(merge); 1811} 1812 1813void 1814RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) 1815{ 1816 textureMask(tex); 1817 condenseDefs(tex); 1818 1819 int n = tex->srcCount(0xff, true); 1820 if (n > 4) { 1821 condenseSrcs(tex, 0, 3); 1822 if (n > 5) // NOTE: first call modified positions already 1823 condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); 1824 } else 1825 if (n > 1) { 1826 condenseSrcs(tex, 0, n - 1); 1827 } 1828} 1829 1830void 1831RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) 1832{ 1833 int n, s; 1834 1835 textureMask(tex); 1836 1837 if (tex->op == OP_TXQ) { 1838 s = tex->srcCount(0xff); 1839 n = 0; 1840 } else { 1841 s = tex->tex.target.getArgCount(); 1842 if (!tex->tex.target.isArray() && 1843 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0)) 1844 ++s; 1845 if (tex->op == OP_TXD && tex->tex.useOffsets) 1846 ++s; 1847 n = tex->srcCount(0xff) - s; 1848 assert(n <= 4); 1849 } 1850 1851 if (s > 1) 1852 condenseSrcs(tex, 0, s - 1); 1853 if (n > 1) // NOTE: first call modified positions already 1854 condenseSrcs(tex, 1, n); 1855 1856 condenseDefs(tex); 1857} 1858 1859void 1860RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex) 1861{ 1862 Value *pred = tex->getPredicate(); 1863 if (pred) 1864 tex->setPredicate(tex->cc, NULL); 1865 1866 textureMask(tex); 1867 1868 assert(tex->defExists(0) && tex->srcExists(0)); 1869 // make src and def count match 1870 int c; 1871 for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) { 1872 if (!tex->srcExists(c)) 1873 tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue())); 1874 if (!tex->defExists(c)) 1875 tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue())); 1876 } 1877 if (pred) 1878 tex->setPredicate(tex->cc, pred); 1879 condenseDefs(tex); 1880 condenseSrcs(tex, 0, c - 1); 1881} 1882 1883// Insert constraint markers for instructions whose multiple sources must be 1884// located in consecutive registers. 1885bool 1886RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) 1887{ 1888 TexInstruction *tex; 1889 Instruction *next; 1890 int s, size; 1891 1892 targ = bb->getProgram()->getTarget(); 1893 1894 for (Instruction *i = bb->getEntry(); i; i = next) { 1895 next = i->next; 1896 1897 if ((tex = i->asTex())) { 1898 switch (targ->getChipset() & ~0xf) { 1899 case 0x50: 1900 case 0x80: 1901 case 0x90: 1902 case 0xa0: 1903 texConstraintNV50(tex); 1904 break; 1905 case 0xc0: 1906 case 0xd0: 1907 texConstraintNVC0(tex); 1908 break; 1909 case 0xe0: 1910 texConstraintNVE0(tex); 1911 break; 1912 default: 1913 break; 1914 } 1915 } else 1916 if (i->op == OP_EXPORT || i->op == OP_STORE) { 1917 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) { 1918 assert(i->srcExists(s)); 1919 size -= i->getSrc(s)->reg.size; 1920 } 1921 condenseSrcs(i, 1, s - 1); 1922 } else 1923 if (i->op == OP_LOAD || i->op == OP_VFETCH) { 1924 condenseDefs(i); 1925 if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8) 1926 addHazard(i, i->src(0).getIndirect(0)); 1927 } else 1928 if (i->op == OP_UNION) { 1929 constrList.push_back(i); 1930 } 1931 } 1932 return true; 1933} 1934 1935// Insert extra moves so that, if multiple register constraints on a value are 1936// in conflict, these conflicts can be resolved. 1937bool 1938RegAlloc::InsertConstraintsPass::insertConstraintMoves() 1939{ 1940 for (std::list<Instruction *>::iterator it = constrList.begin(); 1941 it != constrList.end(); 1942 ++it) { 1943 Instruction *cst = *it; 1944 Instruction *mov; 1945 1946 if (cst->op == OP_SPLIT && 0) { 1947 // spilling splits is annoying, just make sure they're separate 1948 for (int d = 0; cst->defExists(d); ++d) { 1949 if (!cst->getDef(d)->refCount()) 1950 continue; 1951 LValue *lval = new_LValue(func, cst->def(d).getFile()); 1952 const uint8_t size = cst->def(d).getSize(); 1953 lval->reg.size = size; 1954 1955 mov = new_Instruction(func, OP_MOV, typeOfSize(size)); 1956 mov->setSrc(0, lval); 1957 mov->setDef(0, cst->getDef(d)); 1958 cst->setDef(d, mov->getSrc(0)); 1959 cst->bb->insertAfter(cst, mov); 1960 1961 cst->getSrc(0)->asLValue()->noSpill = 1; 1962 mov->getSrc(0)->asLValue()->noSpill = 1; 1963 } 1964 } else 1965 if (cst->op == OP_MERGE || cst->op == OP_UNION) { 1966 for (int s = 0; cst->srcExists(s); ++s) { 1967 const uint8_t size = cst->src(s).getSize(); 1968 1969 if (!cst->getSrc(s)->defs.size()) { 1970 mov = new_Instruction(func, OP_NOP, typeOfSize(size)); 1971 mov->setDef(0, cst->getSrc(s)); 1972 cst->bb->insertBefore(cst, mov); 1973 continue; 1974 } 1975 assert(cst->getSrc(s)->defs.size() == 1); // still SSA 1976 1977 Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); 1978 // catch some cases where don't really need MOVs 1979 if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) 1980 continue; 1981 1982 LValue *lval = new_LValue(func, cst->src(s).getFile()); 1983 lval->reg.size = size; 1984 1985 mov = new_Instruction(func, OP_MOV, typeOfSize(size)); 1986 mov->setDef(0, lval); 1987 mov->setSrc(0, cst->getSrc(s)); 1988 cst->setSrc(s, mov->getDef(0)); 1989 cst->bb->insertBefore(cst, mov); 1990 1991 cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help 1992 1993 if (cst->op == OP_UNION) 1994 mov->setPredicate(defi->cc, defi->getPredicate()); 1995 } 1996 } 1997 } 1998 1999 return true; 2000} 2001 2002} // namespace nv50_ir 2003