1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "nv50/codegen/nv50_ir.h" 24#include "nv50/codegen/nv50_ir_target.h" 25 26namespace nv50_ir { 27 28const uint8_t Target::operationSrcNr[OP_LAST + 1] = 29{ 30 0, 0, // NOP, PHI 31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 32 1, 1, 2, // MOV, LOAD, STORE 33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 34 1, 1, 1, // ABS, NEG, NOT 35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 36 2, 2, 1, // MAX, MIN, SAT 37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT 39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW 41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, 42 0, 0, 0, // PRERET,CONT,BREAK 43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR 44 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP 45 1, 1, // EMIT, RESTART 46 1, 1, 1, // TEX, TXB, TXL, 47 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA 48 1, 2, // SULD, SUST 49 1, 1, // DFDX, DFDY 50 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 51 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR 52 0 53}; 54 55const OpClass Target::operationClass[OP_LAST + 1] = 56{ 57 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT 58 OPCLASS_OTHER, 59 OPCLASS_PSEUDO, 60 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, 61 // MOV; LOAD; STORE 62 OPCLASS_MOVE, 63 OPCLASS_LOAD, 64 OPCLASS_STORE, 65 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD 66 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 67 OPCLASS_ARITH, OPCLASS_ARITH, 68 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 69 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR 70 OPCLASS_CONVERT, OPCLASS_CONVERT, 71 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, 72 OPCLASS_SHIFT, OPCLASS_SHIFT, 73 // MAX, MIN 74 OPCLASS_COMPARE, OPCLASS_COMPARE, 75 // SAT, CEIL, FLOOR, TRUNC; CVT 76 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, 77 OPCLASS_CONVERT, 78 // SET(AND,OR,XOR); SELP, SLCT 79 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, 80 OPCLASS_COMPARE, OPCLASS_COMPARE, 81 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW 82 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 83 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 84 OPCLASS_SFU, OPCLASS_SFU, 85 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN 86 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 87 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 88 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 89 // DISCARD, EXIT 90 OPCLASS_FLOW, OPCLASS_FLOW, 91 // MEMBAR 92 OPCLASS_OTHER, 93 // VFETCH, PFETCH, EXPORT 94 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, 95 // LINTERP, PINTERP 96 OPCLASS_SFU, OPCLASS_SFU, 97 // EMIT, RESTART 98 OPCLASS_OTHER, OPCLASS_OTHER, 99 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA 100 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 101 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 102 // SULD, SUST 103 OPCLASS_SURFACE, OPCLASS_SURFACE, 104 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP 105 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 106 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 107 // POPCNT, INSBF, EXTBF 108 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 109 // TEXBAR 110 OPCLASS_OTHER, 111 OPCLASS_PSEUDO // LAST 112}; 113 114 115extern Target *getTargetNVC0(unsigned int chipset); 116extern Target *getTargetNV50(unsigned int chipset); 117 118Target *Target::create(unsigned int chipset) 119{ 120 switch (chipset & 0xf0) { 121 case 0xc0: 122 case 0xd0: 123 case 0xe0: 124 return getTargetNVC0(chipset); 125 case 0x50: 126 case 0x80: 127 case 0x90: 128 case 0xa0: 129 return getTargetNV50(chipset); 130 default: 131 ERROR("unsupported target: NV%x\n", chipset); 132 return 0; 133 } 134} 135 136void Target::destroy(Target *targ) 137{ 138 delete targ; 139} 140 141CodeEmitter::CodeEmitter(const Target *target) : targ(target) 142{ 143} 144 145void 146CodeEmitter::setCodeLocation(void *ptr, uint32_t size) 147{ 148 code = reinterpret_cast<uint32_t *>(ptr); 149 codeSize = 0; 150 codeSizeLimit = size; 151} 152 153void 154CodeEmitter::printBinary() const 155{ 156 uint32_t *bin = code - codeSize / 4; 157 INFO("program binary (%u bytes)", codeSize); 158 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { 159 if ((pos % 8) == 0) 160 INFO("\n"); 161 INFO("%08x ", bin[pos]); 162 } 163 INFO("\n"); 164} 165 166static inline uint32_t sizeToBundlesNVE4(uint32_t size) 167{ 168 return (size + 55) / 56; 169} 170 171void 172CodeEmitter::prepareEmission(Program *prog) 173{ 174 for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 175 !fi.end(); fi.next()) { 176 Function *func = reinterpret_cast<Function *>(fi.get()); 177 func->binPos = prog->binSize; 178 prepareEmission(func); 179 180 // adjust sizes & positions for schedulding info: 181 if (prog->getTarget()->hasSWSched) { 182 BasicBlock *bb = NULL; 183 for (int i = 0; i < func->bbCount; ++i) { 184 bb = func->bbArray[i]; 185 const uint32_t oldPos = bb->binPos; 186 const uint32_t oldEnd = bb->binPos + bb->binSize; 187 uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8; 188 uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8; 189 bb->binPos = adjPos; 190 bb->binSize = adjEnd - adjPos; 191 } 192 if (bb) 193 func->binSize = bb->binPos + bb->binSize; 194 } 195 196 prog->binSize += func->binSize; 197 } 198} 199 200void 201CodeEmitter::prepareEmission(Function *func) 202{ 203 func->bbCount = 0; 204 func->bbArray = new BasicBlock * [func->cfg.getSize()]; 205 206 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; 207 208 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) 209 prepareEmission(BasicBlock::get(*it)); 210} 211 212void 213CodeEmitter::prepareEmission(BasicBlock *bb) 214{ 215 Instruction *i, *next; 216 Function *func = bb->getFunction(); 217 int j; 218 unsigned int nShort; 219 220 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); 221 222 for (; j >= 0; --j) { 223 BasicBlock *in = func->bbArray[j]; 224 Instruction *exit = in->getExit(); 225 226 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { 227 in->binSize -= 8; 228 func->binSize -= 8; 229 230 for (++j; j < func->bbCount; ++j) 231 func->bbArray[j]->binPos -= 8; 232 233 in->remove(exit); 234 } 235 bb->binPos = in->binPos + in->binSize; 236 if (in->binSize) // no more no-op branches to bb 237 break; 238 } 239 func->bbArray[func->bbCount++] = bb; 240 241 if (!bb->getExit()) 242 return; 243 244 // determine encoding size, try to group short instructions 245 nShort = 0; 246 for (i = bb->getEntry(); i; i = next) { 247 next = i->next; 248 249 i->encSize = getMinEncodingSize(i); 250 if (next && i->encSize < 8) 251 ++nShort; 252 else 253 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { 254 if (i->isCommutationLegal(i->next)) { 255 bb->permuteAdjacent(i, next); 256 next->encSize = 4; 257 next = i; 258 i = i->prev; 259 ++nShort; 260 } else 261 if (i->isCommutationLegal(i->prev) && next->next) { 262 bb->permuteAdjacent(i->prev, i); 263 next->encSize = 4; 264 next = next->next; 265 bb->binSize += 4; 266 ++nShort; 267 } else { 268 i->encSize = 8; 269 i->prev->encSize = 8; 270 bb->binSize += 4; 271 nShort = 0; 272 } 273 } else { 274 i->encSize = 8; 275 if (nShort & 1) { 276 i->prev->encSize = 8; 277 bb->binSize += 4; 278 } 279 nShort = 0; 280 } 281 bb->binSize += i->encSize; 282 } 283 284 if (bb->getExit()->encSize == 4) { 285 assert(nShort); 286 bb->getExit()->encSize = 8; 287 bb->binSize += 4; 288 289 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { 290 bb->binSize += 8; 291 bb->getExit()->prev->encSize = 8; 292 } 293 } 294 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); 295 296 func->binSize += bb->binSize; 297} 298 299void 300Program::emitSymbolTable(struct nv50_ir_prog_info *info) 301{ 302 unsigned int n = 0, nMax = allFuncs.getSize(); 303 304 info->bin.syms = 305 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); 306 307 for (ArrayList::Iterator fi = allFuncs.iterator(); 308 !fi.end(); 309 fi.next(), ++n) { 310 Function *f = (Function *)fi.get(); 311 assert(n < nMax); 312 313 info->bin.syms[n].label = f->getLabel(); 314 info->bin.syms[n].offset = f->binPos; 315 } 316 317 info->bin.numSyms = n; 318} 319 320bool 321Program::emitBinary(struct nv50_ir_prog_info *info) 322{ 323 CodeEmitter *emit = target->getCodeEmitter(progType); 324 325 emit->prepareEmission(this); 326 327 if (dbgFlags & NV50_IR_DEBUG_BASIC) 328 this->print(); 329 330 if (!binSize) { 331 code = NULL; 332 return false; 333 } 334 code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); 335 if (!code) 336 return false; 337 emit->setCodeLocation(code, binSize); 338 339 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { 340 Function *fn = reinterpret_cast<Function *>(fi.get()); 341 342 assert(emit->getCodeSize() == fn->binPos); 343 344 for (int b = 0; b < fn->bbCount; ++b) 345 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) 346 emit->emitInstruction(i); 347 } 348 info->bin.relocData = emit->getRelocInfo(); 349 350 emitSymbolTable(info); 351 352 // the nvc0 driver will print the binary iself together with the header 353 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) 354 emit->printBinary(); 355 356 delete emit; 357 return true; 358} 359 360#define RELOC_ALLOC_INCREMENT 8 361 362bool 363CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, 364 int s) 365{ 366 unsigned int n = relocInfo ? relocInfo->count : 0; 367 368 if (!(n % RELOC_ALLOC_INCREMENT)) { 369 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); 370 relocInfo = reinterpret_cast<RelocInfo *>( 371 REALLOC(relocInfo, n ? size : 0, 372 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); 373 if (!relocInfo) 374 return false; 375 if (n == 0) 376 memset(relocInfo, 0, sizeof(RelocInfo)); 377 } 378 ++relocInfo->count; 379 380 relocInfo->entry[n].data = data; 381 relocInfo->entry[n].mask = m; 382 relocInfo->entry[n].offset = codeSize + w * 4; 383 relocInfo->entry[n].bitPos = s; 384 relocInfo->entry[n].type = ty; 385 386 return true; 387} 388 389void 390RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const 391{ 392 uint32_t value = 0; 393 394 switch (type) { 395 case TYPE_CODE: value = info->codePos; break; 396 case TYPE_BUILTIN: value = info->libPos; break; 397 case TYPE_DATA: value = info->dataPos; break; 398 default: 399 assert(0); 400 break; 401 } 402 value += data; 403 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); 404 405 binary[offset / 4] &= ~mask; 406 binary[offset / 4] |= value & mask; 407} 408 409} // namespace nv50_ir 410 411 412#include "nv50/codegen/nv50_ir_driver.h" 413 414extern "C" { 415 416void 417nv50_ir_relocate_code(void *relocData, uint32_t *code, 418 uint32_t codePos, 419 uint32_t libPos, 420 uint32_t dataPos) 421{ 422 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); 423 424 info->codePos = codePos; 425 info->libPos = libPos; 426 info->dataPos = dataPos; 427 428 for (unsigned int i = 0; i < info->count; ++i) 429 info->entry[i].apply(code, info); 430} 431 432void 433nv50_ir_get_target_library(uint32_t chipset, 434 const uint32_t **code, uint32_t *size) 435{ 436 nv50_ir::Target *targ = nv50_ir::Target::create(chipset); 437 targ->getBuiltinCode(code, size); 438 nv50_ir::Target::destroy(targ); 439} 440 441} 442