1/* 2 * Copyright 2012 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "codegen/nv50_ir_target_nvc0.h" 24 25// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA. 26 27namespace nv50_ir { 28 29class CodeEmitterGK110 : public CodeEmitter 30{ 31public: 32 CodeEmitterGK110(const TargetNVC0 *); 33 34 virtual bool emitInstruction(Instruction *); 35 virtual uint32_t getMinEncodingSize(const Instruction *) const; 36 virtual void prepareEmission(Function *); 37 38 inline void setProgramType(Program::Type pType) { progType = pType; } 39 40private: 41 const TargetNVC0 *targNVC0; 42 43 Program::Type progType; 44 45 const bool writeIssueDelays; 46 47private: 48 void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1); 49 void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg); 50 void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier); 51 52 void emitPredicate(const Instruction *); 53 54 void setCAddress14(const ValueRef&); 55 void setShortImmediate(const Instruction *, const int s); 56 void setImmediate32(const Instruction *, const int s, Modifier); 57 void setSUConst16(const Instruction *, const int s); 58 59 void modNegAbsF32_3b(const Instruction *, const int s); 60 61 void emitCondCode(CondCode cc, int pos, uint8_t mask); 62 void emitInterpMode(const Instruction *); 63 void emitLoadStoreType(DataType ty, const int pos); 64 void emitCachingMode(CacheMode c, const int pos); 65 void emitSUGType(DataType, const int pos); 66 void emitSUCachingMode(CacheMode c); 67 68 inline uint8_t getSRegEncoding(const ValueRef&); 69 70 void emitRoundMode(RoundMode, const int pos, const int rintPos); 71 void emitRoundModeF(RoundMode, const int pos); 72 void emitRoundModeI(RoundMode, const int pos); 73 74 void emitNegAbs12(const Instruction *); 75 76 void emitNOP(const Instruction *); 77 78 void emitLOAD(const Instruction *); 79 void emitSTORE(const Instruction *); 80 void emitMOV(const Instruction *); 81 void emitATOM(const Instruction *); 82 void emitCCTL(const Instruction *); 83 84 void emitINTERP(const Instruction *); 85 void emitAFETCH(const Instruction *); 86 void emitPFETCH(const Instruction *); 87 void emitVFETCH(const Instruction *); 88 void emitEXPORT(const Instruction *); 89 void emitOUT(const Instruction *); 90 91 void emitUADD(const Instruction *); 92 void emitFADD(const Instruction *); 93 void emitDADD(const Instruction *); 94 void emitIMUL(const Instruction *); 95 void emitFMUL(const Instruction *); 96 void emitDMUL(const Instruction *); 97 void emitIMAD(const Instruction *); 98 void emitISAD(const Instruction *); 99 void emitSHLADD(const Instruction *); 100 void emitFMAD(const Instruction *); 101 void emitDMAD(const Instruction *); 102 void emitMADSP(const Instruction *i); 103 104 void emitNOT(const Instruction *); 105 void emitLogicOp(const Instruction *, uint8_t subOp); 106 void emitPOPC(const Instruction *); 107 void emitINSBF(const Instruction *); 108 void emitEXTBF(const Instruction *); 109 void emitBFIND(const Instruction *); 110 void emitPERMT(const Instruction *); 111 void emitShift(const Instruction *); 112 113 void emitSFnOp(const Instruction *, uint8_t subOp); 114 115 void emitCVT(const Instruction *); 116 void emitMINMAX(const Instruction *); 117 void emitPreOp(const Instruction *); 118 119 void emitSET(const CmpInstruction *); 120 void emitSLCT(const CmpInstruction *); 121 void emitSELP(const Instruction *); 122 123 void emitTEXBAR(const Instruction *); 124 void emitTEX(const TexInstruction *); 125 void emitTEXCSAA(const TexInstruction *); 126 void emitTXQ(const TexInstruction *); 127 128 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); 129 130 void emitPIXLD(const Instruction *); 131 132 void emitBAR(const Instruction *); 133 void emitMEMBAR(const Instruction *); 134 135 void emitFlow(const Instruction *); 136 137 void emitVOTE(const Instruction *); 138 139 void emitSULDGB(const TexInstruction *); 140 void emitSUSTGx(const TexInstruction *); 141 void emitSUCLAMPMode(uint16_t); 142 void emitSUCalc(Instruction *); 143 144 void emitVSHL(const Instruction *); 145 void emitVectorSubOp(const Instruction *); 146 147 inline void defId(const ValueDef&, const int pos); 148 inline void srcId(const ValueRef&, const int pos); 149 inline void srcId(const ValueRef *, const int pos); 150 inline void srcId(const Instruction *, int s, const int pos); 151 152 inline void srcAddr32(const ValueRef&, const int pos); // address / 4 153 154 inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false); 155}; 156 157#define GK110_GPR_ZERO 255 158 159#define NEG_(b, s) \ 160 if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) 161#define ABS_(b, s) \ 162 if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) 163 164#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \ 165 code[(0x##b) / 32] |= 1 << ((0x##b) % 32) 166 167#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) 168#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) 169 170#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) 171 172#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b) 173 174#define SDATA(a) ((a).rep()->reg.data) 175#define DDATA(a) ((a).rep()->reg.data) 176 177void CodeEmitterGK110::srcId(const ValueRef& src, const int pos) 178{ 179 code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32); 180} 181 182void CodeEmitterGK110::srcId(const ValueRef *src, const int pos) 183{ 184 code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32); 185} 186 187void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos) 188{ 189 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO; 190 code[pos / 32] |= r << (pos % 32); 191} 192 193void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos) 194{ 195 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32); 196} 197 198void CodeEmitterGK110::defId(const ValueDef& def, const int pos) 199{ 200 code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32); 201} 202 203bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod) 204{ 205 const ImmediateValue *imm = ref.get()->asImm(); 206 207 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); 208} 209 210void 211CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos) 212{ 213 bool rint = false; 214 uint8_t n; 215 216 switch (rnd) { 217 case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break; 218 case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break; 219 case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break; 220 default: 221 rint = rnd == ROUND_NI; 222 n = 0; 223 assert(rnd == ROUND_N || rnd == ROUND_NI); 224 break; 225 } 226 code[pos / 32] |= n << (pos % 32); 227 if (rint && rintPos >= 0) 228 code[rintPos / 32] |= 1 << (rintPos % 32); 229} 230 231void 232CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos) 233{ 234 uint8_t n; 235 236 switch (rnd) { 237 case ROUND_M: n = 1; break; 238 case ROUND_P: n = 2; break; 239 case ROUND_Z: n = 3; break; 240 default: 241 n = 0; 242 assert(rnd == ROUND_N); 243 break; 244 } 245 code[pos / 32] |= n << (pos % 32); 246} 247 248void 249CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos) 250{ 251 uint8_t n; 252 253 switch (rnd) { 254 case ROUND_MI: n = 1; break; 255 case ROUND_PI: n = 2; break; 256 case ROUND_ZI: n = 3; break; 257 default: 258 n = 0; 259 assert(rnd == ROUND_NI); 260 break; 261 } 262 code[pos / 32] |= n << (pos % 32); 263} 264 265void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask) 266{ 267 uint8_t n; 268 269 switch (cc) { 270 case CC_FL: n = 0x00; break; 271 case CC_LT: n = 0x01; break; 272 case CC_EQ: n = 0x02; break; 273 case CC_LE: n = 0x03; break; 274 case CC_GT: n = 0x04; break; 275 case CC_NE: n = 0x05; break; 276 case CC_GE: n = 0x06; break; 277 case CC_LTU: n = 0x09; break; 278 case CC_EQU: n = 0x0a; break; 279 case CC_LEU: n = 0x0b; break; 280 case CC_GTU: n = 0x0c; break; 281 case CC_NEU: n = 0x0d; break; 282 case CC_GEU: n = 0x0e; break; 283 case CC_TR: n = 0x0f; break; 284 case CC_NO: n = 0x10; break; 285 case CC_NC: n = 0x11; break; 286 case CC_NS: n = 0x12; break; 287 case CC_NA: n = 0x13; break; 288 case CC_A: n = 0x14; break; 289 case CC_S: n = 0x15; break; 290 case CC_C: n = 0x16; break; 291 case CC_O: n = 0x17; break; 292 default: 293 n = 0; 294 assert(!"invalid condition code"); 295 break; 296 } 297 code[pos / 32] |= (n & mask) << (pos % 32); 298} 299 300void 301CodeEmitterGK110::emitPredicate(const Instruction *i) 302{ 303 if (i->predSrc >= 0) { 304 srcId(i->src(i->predSrc), 18); 305 if (i->cc == CC_NOT_P) 306 code[0] |= 8 << 18; // negate 307 assert(i->getPredicate()->reg.file == FILE_PREDICATE); 308 } else { 309 code[0] |= 7 << 18; 310 } 311} 312 313void 314CodeEmitterGK110::setCAddress14(const ValueRef& src) 315{ 316 const Storage& res = src.get()->asSym()->reg; 317 const int32_t addr = res.data.offset / 4; 318 319 code[0] |= (addr & 0x01ff) << 23; 320 code[1] |= (addr & 0x3e00) >> 9; 321 code[1] |= res.fileIndex << 5; 322} 323 324void 325CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s) 326{ 327 const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; 328 const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64; 329 330 if (i->sType == TYPE_F32) { 331 assert(!(u32 & 0x00000fff)); 332 code[0] |= ((u32 & 0x001ff000) >> 12) << 23; 333 code[1] |= ((u32 & 0x7fe00000) >> 21); 334 code[1] |= ((u32 & 0x80000000) >> 4); 335 } else 336 if (i->sType == TYPE_F64) { 337 assert(!(u64 & 0x00000fffffffffffULL)); 338 code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23; 339 code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53); 340 code[1] |= ((u64 & 0x8000000000000000ULL) >> 36); 341 } else { 342 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); 343 code[0] |= (u32 & 0x001ff) << 23; 344 code[1] |= (u32 & 0x7fe00) >> 9; 345 code[1] |= (u32 & 0x80000) << 8; 346 } 347} 348 349void 350CodeEmitterGK110::setImmediate32(const Instruction *i, const int s, 351 Modifier mod) 352{ 353 uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; 354 355 if (mod) { 356 ImmediateValue imm(i->getSrc(s)->asImm(), i->sType); 357 mod.applyTo(imm); 358 u32 = imm.reg.data.u32; 359 } 360 361 code[0] |= u32 << 23; 362 code[1] |= u32 >> 9; 363} 364 365void 366CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, 367 Modifier mod) 368{ 369 code[0] = ctg; 370 code[1] = opc << 20; 371 372 emitPredicate(i); 373 374 defId(i->def(0), 2); 375 376 for (int s = 0; s < 3 && i->srcExists(s); ++s) { 377 switch (i->src(s).getFile()) { 378 case FILE_GPR: 379 srcId(i->src(s), s ? 42 : 10); 380 break; 381 case FILE_IMMEDIATE: 382 setImmediate32(i, s, mod); 383 break; 384 default: 385 break; 386 } 387 } 388} 389 390 391void 392CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg) 393{ 394 code[0] = ctg; 395 code[1] = opc << 20; 396 397 emitPredicate(i); 398 399 defId(i->def(0), 2); 400 401 switch (i->src(0).getFile()) { 402 case FILE_MEMORY_CONST: 403 code[1] |= 0x4 << 28; 404 setCAddress14(i->src(0)); 405 break; 406 case FILE_GPR: 407 code[1] |= 0xc << 28; 408 srcId(i->src(0), 23); 409 break; 410 default: 411 assert(0); 412 break; 413 } 414} 415 416// 0x2 for GPR, c[] and 0x1 for short immediate 417void 418CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2, 419 uint32_t opc1) 420{ 421 const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE; 422 423 int s1 = 23; 424 if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST) 425 s1 = 42; 426 427 if (imm) { 428 code[0] = 0x1; 429 code[1] = opc1 << 20; 430 } else { 431 code[0] = 0x2; 432 code[1] = (0xc << 28) | (opc2 << 20); 433 } 434 435 emitPredicate(i); 436 437 defId(i->def(0), 2); 438 439 for (int s = 0; s < 3 && i->srcExists(s); ++s) { 440 switch (i->src(s).getFile()) { 441 case FILE_MEMORY_CONST: 442 code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28); 443 setCAddress14(i->src(s)); 444 break; 445 case FILE_IMMEDIATE: 446 setShortImmediate(i, s); 447 break; 448 case FILE_GPR: 449 srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10); 450 break; 451 default: 452 if (i->op == OP_SELP) { 453 assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE); 454 srcId(i->src(s), 42); 455 } 456 // ignore here, can be predicate or flags, but must not be address 457 break; 458 } 459 } 460 // 0x0 = invalid 461 // 0xc = rrr 462 // 0x8 = rrc 463 // 0x4 = rcr 464 assert(imm || (code[1] & (0xc << 28))); 465} 466 467inline void 468CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s) 469{ 470 if (i->src(s).mod.abs()) code[1] &= ~(1 << 27); 471 if (i->src(s).mod.neg()) code[1] ^= (1 << 27); 472} 473 474void 475CodeEmitterGK110::emitNOP(const Instruction *i) 476{ 477 code[0] = 0x00003c02; 478 code[1] = 0x85800000; 479 480 if (i) 481 emitPredicate(i); 482 else 483 code[0] = 0x001c3c02; 484} 485 486void 487CodeEmitterGK110::emitFMAD(const Instruction *i) 488{ 489 assert(!isLIMM(i->src(1), TYPE_F32)); 490 491 emitForm_21(i, 0x0c0, 0x940); 492 493 NEG_(34, 2); 494 SAT_(35); 495 RND_(36, F); 496 FTZ_(38); 497 DNZ_(39); 498 499 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); 500 501 if (code[0] & 0x1) { 502 if (neg1) 503 code[1] ^= 1 << 27; 504 } else 505 if (neg1) { 506 code[1] |= 1 << 19; 507 } 508} 509 510void 511CodeEmitterGK110::emitDMAD(const Instruction *i) 512{ 513 assert(!i->saturate); 514 assert(!i->ftz); 515 516 emitForm_21(i, 0x1b8, 0xb38); 517 518 NEG_(34, 2); 519 RND_(36, F); 520 521 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); 522 523 if (code[0] & 0x1) { 524 if (neg1) 525 code[1] ^= 1 << 27; 526 } else 527 if (neg1) { 528 code[1] |= 1 << 19; 529 } 530} 531 532void 533CodeEmitterGK110::emitMADSP(const Instruction *i) 534{ 535 emitForm_21(i, 0x140, 0xa40); 536 537 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) { 538 code[1] |= 0x00c00000; 539 } else { 540 code[1] |= (i->subOp & 0x00f) << 19; // imadp1 541 code[1] |= (i->subOp & 0x0f0) << 20; // imadp2 542 code[1] |= (i->subOp & 0x100) << 11; // imadp3 543 code[1] |= (i->subOp & 0x200) << 15; // imadp3 544 code[1] |= (i->subOp & 0xc00) << 12; // imadp3 545 } 546 547 if (i->flagsDef >= 0) 548 code[1] |= 1 << 18; 549} 550 551void 552CodeEmitterGK110::emitFMUL(const Instruction *i) 553{ 554 bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); 555 556 assert(i->postFactor >= -3 && i->postFactor <= 3); 557 558 if (isLIMM(i->src(1), TYPE_F32)) { 559 emitForm_L(i, 0x200, 0x2, Modifier(0)); 560 561 FTZ_(38); 562 DNZ_(39); 563 SAT_(3a); 564 if (neg) 565 code[1] ^= 1 << 22; 566 567 assert(i->postFactor == 0); 568 } else { 569 emitForm_21(i, 0x234, 0xc34); 570 code[1] |= ((i->postFactor > 0) ? 571 (7 - i->postFactor) : (0 - i->postFactor)) << 12; 572 573 RND_(2a, F); 574 FTZ_(2f); 575 DNZ_(30); 576 SAT_(35); 577 578 if (code[0] & 0x1) { 579 if (neg) 580 code[1] ^= 1 << 27; 581 } else 582 if (neg) { 583 code[1] |= 1 << 19; 584 } 585 } 586} 587 588void 589CodeEmitterGK110::emitDMUL(const Instruction *i) 590{ 591 bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); 592 593 assert(!i->postFactor); 594 assert(!i->saturate); 595 assert(!i->ftz); 596 assert(!i->dnz); 597 598 emitForm_21(i, 0x240, 0xc40); 599 600 RND_(2a, F); 601 602 if (code[0] & 0x1) { 603 if (neg) 604 code[1] ^= 1 << 27; 605 } else 606 if (neg) { 607 code[1] |= 1 << 19; 608 } 609} 610 611void 612CodeEmitterGK110::emitIMUL(const Instruction *i) 613{ 614 assert(!i->src(0).mod.neg() && !i->src(1).mod.neg()); 615 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); 616 617 if (i->src(1).getFile() == FILE_IMMEDIATE) { 618 emitForm_L(i, 0x280, 2, Modifier(0)); 619 620 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) 621 code[1] |= 1 << 24; 622 if (i->sType == TYPE_S32) 623 code[1] |= 3 << 25; 624 } else { 625 emitForm_21(i, 0x21c, 0xc1c); 626 627 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) 628 code[1] |= 1 << 10; 629 if (i->sType == TYPE_S32) 630 code[1] |= 3 << 11; 631 } 632} 633 634void 635CodeEmitterGK110::emitFADD(const Instruction *i) 636{ 637 if (isLIMM(i->src(1), TYPE_F32)) { 638 assert(i->rnd == ROUND_N); 639 assert(!i->saturate); 640 641 Modifier mod = i->src(1).mod ^ 642 Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0); 643 644 emitForm_L(i, 0x400, 0, mod); 645 646 FTZ_(3a); 647 NEG_(3b, 0); 648 ABS_(39, 0); 649 } else { 650 emitForm_21(i, 0x22c, 0xc2c); 651 652 FTZ_(2f); 653 RND_(2a, F); 654 ABS_(31, 0); 655 NEG_(33, 0); 656 SAT_(35); 657 658 if (code[0] & 0x1) { 659 modNegAbsF32_3b(i, 1); 660 if (i->op == OP_SUB) code[1] ^= 1 << 27; 661 } else { 662 ABS_(34, 1); 663 NEG_(30, 1); 664 if (i->op == OP_SUB) code[1] ^= 1 << 16; 665 } 666 } 667} 668 669void 670CodeEmitterGK110::emitDADD(const Instruction *i) 671{ 672 assert(!i->saturate); 673 assert(!i->ftz); 674 675 emitForm_21(i, 0x238, 0xc38); 676 RND_(2a, F); 677 ABS_(31, 0); 678 NEG_(33, 0); 679 if (code[0] & 0x1) { 680 modNegAbsF32_3b(i, 1); 681 if (i->op == OP_SUB) code[1] ^= 1 << 27; 682 } else { 683 NEG_(30, 1); 684 ABS_(34, 1); 685 if (i->op == OP_SUB) code[1] ^= 1 << 16; 686 } 687} 688 689void 690CodeEmitterGK110::emitUADD(const Instruction *i) 691{ 692 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg(); 693 694 if (i->op == OP_SUB) 695 addOp ^= 1; 696 697 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); 698 699 if (isLIMM(i->src(1), TYPE_S32)) { 700 emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0)); 701 702 if (addOp & 2) 703 code[1] |= 1 << 27; 704 705 assert(!i->defExists(1)); 706 assert(i->flagsSrc < 0); 707 708 SAT_(39); 709 } else { 710 emitForm_21(i, 0x208, 0xc08); 711 712 assert(addOp != 3); // would be add-plus-one 713 714 code[1] |= addOp << 19; 715 716 if (i->defExists(1)) 717 code[1] |= 1 << 18; // write carry 718 if (i->flagsSrc >= 0) 719 code[1] |= 1 << 14; // add carry 720 721 SAT_(35); 722 } 723} 724 725void 726CodeEmitterGK110::emitIMAD(const Instruction *i) 727{ 728 uint8_t addOp = 729 i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1); 730 731 emitForm_21(i, 0x100, 0xa00); 732 733 assert(addOp != 3); 734 code[1] |= addOp << 26; 735 736 if (i->sType == TYPE_S32) 737 code[1] |= (1 << 19) | (1 << 24); 738 739 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) 740 code[1] |= 1 << 25; 741 742 if (i->flagsDef >= 0) code[1] |= 1 << 18; 743 if (i->flagsSrc >= 0) code[1] |= 1 << 20; 744 745 SAT_(35); 746} 747 748void 749CodeEmitterGK110::emitISAD(const Instruction *i) 750{ 751 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); 752 753 emitForm_21(i, 0x1f4, 0xb74); 754 755 if (i->dType == TYPE_S32) 756 code[1] |= 1 << 19; 757} 758 759void 760CodeEmitterGK110::emitSHLADD(const Instruction *i) 761{ 762 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg(); 763 const ImmediateValue *imm = i->src(1).get()->asImm(); 764 assert(imm); 765 766 if (i->src(2).getFile() == FILE_IMMEDIATE) { 767 code[0] = 0x1; 768 code[1] = 0xc0c << 20; 769 } else { 770 code[0] = 0x2; 771 code[1] = 0x20c << 20; 772 } 773 code[1] |= addOp << 19; 774 775 emitPredicate(i); 776 777 defId(i->def(0), 2); 778 srcId(i->src(0), 10); 779 780 if (i->flagsDef >= 0) 781 code[1] |= 1 << 18; 782 783 assert(!(imm->reg.data.u32 & 0xffffffe0)); 784 code[1] |= imm->reg.data.u32 << 10; 785 786 switch (i->src(2).getFile()) { 787 case FILE_GPR: 788 assert(code[0] & 0x2); 789 code[1] |= 0xc << 28; 790 srcId(i->src(2), 23); 791 break; 792 case FILE_MEMORY_CONST: 793 assert(code[0] & 0x2); 794 code[1] |= 0x4 << 28; 795 setCAddress14(i->src(2)); 796 break; 797 case FILE_IMMEDIATE: 798 assert(code[0] & 0x1); 799 setShortImmediate(i, 2); 800 break; 801 default: 802 assert(!"bad src2 file"); 803 break; 804 } 805} 806 807void 808CodeEmitterGK110::emitNOT(const Instruction *i) 809{ 810 code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src 811 code[1] = 0x22003800; 812 813 emitPredicate(i); 814 815 defId(i->def(0), 2); 816 817 switch (i->src(0).getFile()) { 818 case FILE_GPR: 819 code[1] |= 0xc << 28; 820 srcId(i->src(0), 23); 821 break; 822 case FILE_MEMORY_CONST: 823 code[1] |= 0x4 << 28; 824 setCAddress14(i->src(0)); 825 break; 826 default: 827 assert(0); 828 break; 829 } 830} 831 832void 833CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp) 834{ 835 if (i->def(0).getFile() == FILE_PREDICATE) { 836 code[0] = 0x00000002 | (subOp << 27); 837 code[1] = 0x84800000; 838 839 emitPredicate(i); 840 841 defId(i->def(0), 5); 842 srcId(i->src(0), 14); 843 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17; 844 srcId(i->src(1), 32); 845 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3; 846 847 if (i->defExists(1)) { 848 defId(i->def(1), 2); 849 } else { 850 code[0] |= 7 << 2; 851 } 852 // (a OP b) OP c 853 if (i->predSrc != 2 && i->srcExists(2)) { 854 code[1] |= subOp << 16; 855 srcId(i->src(2), 42); 856 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13; 857 } else { 858 code[1] |= 7 << 10; 859 } 860 } else 861 if (isLIMM(i->src(1), TYPE_S32)) { 862 emitForm_L(i, 0x200, 0, i->src(1).mod); 863 code[1] |= subOp << 24; 864 NOT_(3a, 0); 865 } else { 866 emitForm_21(i, 0x220, 0xc20); 867 code[1] |= subOp << 12; 868 NOT_(2a, 0); 869 NOT_(2b, 1); 870 } 871} 872 873void 874CodeEmitterGK110::emitPOPC(const Instruction *i) 875{ 876 assert(!isLIMM(i->src(1), TYPE_S32, true)); 877 878 emitForm_21(i, 0x204, 0xc04); 879 880 NOT_(2a, 0); 881 if (!(code[0] & 0x1)) 882 NOT_(2b, 1); 883} 884 885void 886CodeEmitterGK110::emitINSBF(const Instruction *i) 887{ 888 emitForm_21(i, 0x1f8, 0xb78); 889} 890 891void 892CodeEmitterGK110::emitEXTBF(const Instruction *i) 893{ 894 emitForm_21(i, 0x600, 0xc00); 895 896 if (i->dType == TYPE_S32) 897 code[1] |= 0x80000; 898 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) 899 code[1] |= 0x800; 900} 901 902void 903CodeEmitterGK110::emitBFIND(const Instruction *i) 904{ 905 emitForm_C(i, 0x218, 0x2); 906 907 if (i->dType == TYPE_S32) 908 code[1] |= 0x80000; 909 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) 910 code[1] |= 0x800; 911 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT) 912 code[1] |= 0x1000; 913} 914 915void 916CodeEmitterGK110::emitPERMT(const Instruction *i) 917{ 918 emitForm_21(i, 0x1e0, 0xb60); 919 920 code[1] |= i->subOp << 19; 921} 922 923void 924CodeEmitterGK110::emitShift(const Instruction *i) 925{ 926 if (i->op == OP_SHR) { 927 emitForm_21(i, 0x214, 0xc14); 928 if (isSignedType(i->dType)) 929 code[1] |= 1 << 19; 930 } else { 931 emitForm_21(i, 0x224, 0xc24); 932 } 933 934 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) 935 code[1] |= 1 << 10; 936} 937 938void 939CodeEmitterGK110::emitPreOp(const Instruction *i) 940{ 941 emitForm_C(i, 0x248, 0x2); 942 943 if (i->op == OP_PREEX2) 944 code[1] |= 1 << 10; 945 946 NEG_(30, 0); 947 ABS_(34, 0); 948} 949 950void 951CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp) 952{ 953 code[0] = 0x00000002 | (subOp << 23); 954 code[1] = 0x84000000; 955 956 emitPredicate(i); 957 958 defId(i->def(0), 2); 959 srcId(i->src(0), 10); 960 961 NEG_(33, 0); 962 ABS_(31, 0); 963 SAT_(35); 964} 965 966void 967CodeEmitterGK110::emitMINMAX(const Instruction *i) 968{ 969 uint32_t op2, op1; 970 971 switch (i->dType) { 972 case TYPE_U32: 973 case TYPE_S32: 974 op2 = 0x210; 975 op1 = 0xc10; 976 break; 977 case TYPE_F32: 978 op2 = 0x230; 979 op1 = 0xc30; 980 break; 981 case TYPE_F64: 982 op2 = 0x228; 983 op1 = 0xc28; 984 break; 985 default: 986 assert(0); 987 op2 = 0; 988 op1 = 0; 989 break; 990 } 991 emitForm_21(i, op2, op1); 992 993 if (i->dType == TYPE_S32) 994 code[1] |= 1 << 19; 995 code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt 996 997 FTZ_(2f); 998 ABS_(31, 0); 999 NEG_(33, 0); 1000 if (code[0] & 0x1) { 1001 modNegAbsF32_3b(i, 1); 1002 } else { 1003 ABS_(34, 1); 1004 NEG_(30, 1); 1005 } 1006} 1007 1008void 1009CodeEmitterGK110::emitCVT(const Instruction *i) 1010{ 1011 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); 1012 const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType); 1013 const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType); 1014 1015 bool sat = i->saturate; 1016 bool abs = i->src(0).mod.abs(); 1017 bool neg = i->src(0).mod.neg(); 1018 1019 RoundMode rnd = i->rnd; 1020 1021 switch (i->op) { 1022 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; 1023 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; 1024 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; 1025 case OP_SAT: sat = true; break; 1026 case OP_NEG: neg = !neg; break; 1027 case OP_ABS: abs = true; neg = false; break; 1028 default: 1029 break; 1030 } 1031 1032 DataType dType; 1033 1034 if (i->op == OP_NEG && i->dType == TYPE_U32) 1035 dType = TYPE_S32; 1036 else 1037 dType = i->dType; 1038 1039 1040 uint32_t op; 1041 1042 if (f2f) op = 0x254; 1043 else if (f2i) op = 0x258; 1044 else if (i2f) op = 0x25c; 1045 else op = 0x260; 1046 1047 emitForm_C(i, op, 0x2); 1048 1049 FTZ_(2f); 1050 if (neg) code[1] |= 1 << 16; 1051 if (abs) code[1] |= 1 << 20; 1052 if (sat) code[1] |= 1 << 21; 1053 1054 emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1); 1055 1056 code[0] |= typeSizeofLog2(dType) << 10; 1057 code[0] |= typeSizeofLog2(i->sType) << 12; 1058 code[1] |= i->subOp << 12; 1059 1060 if (isSignedIntType(dType)) 1061 code[0] |= 0x4000; 1062 if (isSignedIntType(i->sType)) 1063 code[0] |= 0x8000; 1064} 1065 1066void 1067CodeEmitterGK110::emitSET(const CmpInstruction *i) 1068{ 1069 uint16_t op1, op2; 1070 1071 if (i->def(0).getFile() == FILE_PREDICATE) { 1072 switch (i->sType) { 1073 case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break; 1074 case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break; 1075 default: 1076 op2 = 0x1b0; 1077 op1 = 0xb30; 1078 break; 1079 } 1080 emitForm_21(i, op2, op1); 1081 1082 NEG_(2e, 0); 1083 ABS_(9, 0); 1084 if (!(code[0] & 0x1)) { 1085 NEG_(8, 1); 1086 ABS_(2f, 1); 1087 } else { 1088 modNegAbsF32_3b(i, 1); 1089 } 1090 FTZ_(32); 1091 1092 // normal DST field is negated predicate result 1093 code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0); 1094 if (i->defExists(1)) 1095 defId(i->def(1), 2); 1096 else 1097 code[0] |= 0x1c; 1098 } else { 1099 switch (i->sType) { 1100 case TYPE_F32: op2 = 0x000; op1 = 0x800; break; 1101 case TYPE_F64: op2 = 0x080; op1 = 0x900; break; 1102 default: 1103 op2 = 0x1a8; 1104 op1 = 0xb28; 1105 break; 1106 } 1107 emitForm_21(i, op2, op1); 1108 1109 NEG_(2e, 0); 1110 ABS_(39, 0); 1111 if (!(code[0] & 0x1)) { 1112 NEG_(38, 1); 1113 ABS_(2f, 1); 1114 } else { 1115 modNegAbsF32_3b(i, 1); 1116 } 1117 FTZ_(3a); 1118 1119 if (i->dType == TYPE_F32) { 1120 if (isFloatType(i->sType)) 1121 code[1] |= 1 << 23; 1122 else 1123 code[1] |= 1 << 15; 1124 } 1125 } 1126 if (i->sType == TYPE_S32) 1127 code[1] |= 1 << 19; 1128 1129 if (i->op != OP_SET) { 1130 switch (i->op) { 1131 case OP_SET_AND: code[1] |= 0x0 << 16; break; 1132 case OP_SET_OR: code[1] |= 0x1 << 16; break; 1133 case OP_SET_XOR: code[1] |= 0x2 << 16; break; 1134 default: 1135 assert(0); 1136 break; 1137 } 1138 srcId(i->src(2), 0x2a); 1139 } else { 1140 code[1] |= 0x7 << 10; 1141 } 1142 emitCondCode(i->setCond, 1143 isFloatType(i->sType) ? 0x33 : 0x34, 1144 isFloatType(i->sType) ? 0xf : 0x7); 1145} 1146 1147void 1148CodeEmitterGK110::emitSLCT(const CmpInstruction *i) 1149{ 1150 CondCode cc = i->setCond; 1151 if (i->src(2).mod.neg()) 1152 cc = reverseCondCode(cc); 1153 1154 if (i->dType == TYPE_F32) { 1155 emitForm_21(i, 0x1d0, 0xb50); 1156 FTZ_(32); 1157 emitCondCode(cc, 0x33, 0xf); 1158 } else { 1159 emitForm_21(i, 0x1a0, 0xb20); 1160 emitCondCode(cc, 0x34, 0x7); 1161 } 1162} 1163 1164static void 1165selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) 1166{ 1167 int loc = entry->loc; 1168 if (data.force_persample_interp) 1169 code[loc + 1] |= 1 << 13; 1170 else 1171 code[loc + 1] &= ~(1 << 13); 1172} 1173 1174void CodeEmitterGK110::emitSELP(const Instruction *i) 1175{ 1176 emitForm_21(i, 0x250, 0x050); 1177 1178 if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) 1179 code[1] |= 1 << 13; 1180 1181 if (i->subOp == 1) { 1182 addInterp(0, 0, selpFlip); 1183 } 1184} 1185 1186void CodeEmitterGK110::emitTEXBAR(const Instruction *i) 1187{ 1188 code[0] = 0x0000003e | (i->subOp << 23); 1189 code[1] = 0x77000000; 1190 1191 emitPredicate(i); 1192} 1193 1194void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i) 1195{ 1196 code[0] = 0x00000002; 1197 code[1] = 0x76c00000; 1198 1199 code[1] |= i->tex.r << 9; 1200 // code[1] |= i->tex.s << (9 + 8); 1201 1202 if (i->tex.liveOnly) 1203 code[0] |= 0x80000000; 1204 1205 defId(i->def(0), 2); 1206 srcId(i->src(0), 10); 1207} 1208 1209static inline bool 1210isNextIndependentTex(const TexInstruction *i) 1211{ 1212 if (!i->next || !isTextureOp(i->next->op)) 1213 return false; 1214 if (i->getDef(0)->interfers(i->next->getSrc(0))) 1215 return false; 1216 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); 1217} 1218 1219void 1220CodeEmitterGK110::emitTEX(const TexInstruction *i) 1221{ 1222 const bool ind = i->tex.rIndirectSrc >= 0; 1223 1224 if (ind) { 1225 code[0] = 0x00000002; 1226 switch (i->op) { 1227 case OP_TXD: 1228 code[1] = 0x7e000000; 1229 break; 1230 case OP_TXLQ: 1231 code[1] = 0x7e800000; 1232 break; 1233 case OP_TXF: 1234 code[1] = 0x78000000; 1235 break; 1236 case OP_TXG: 1237 code[1] = 0x7dc00000; 1238 break; 1239 default: 1240 code[1] = 0x7d800000; 1241 break; 1242 } 1243 } else { 1244 switch (i->op) { 1245 case OP_TXD: 1246 code[0] = 0x00000002; 1247 code[1] = 0x76000000; 1248 code[1] |= i->tex.r << 9; 1249 break; 1250 case OP_TXLQ: 1251 code[0] = 0x00000002; 1252 code[1] = 0x76800000; 1253 code[1] |= i->tex.r << 9; 1254 break; 1255 case OP_TXF: 1256 code[0] = 0x00000002; 1257 code[1] = 0x70000000; 1258 code[1] |= i->tex.r << 13; 1259 break; 1260 case OP_TXG: 1261 code[0] = 0x00000001; 1262 code[1] = 0x70000000; 1263 code[1] |= i->tex.r << 15; 1264 break; 1265 default: 1266 code[0] = 0x00000001; 1267 code[1] = 0x60000000; 1268 code[1] |= i->tex.r << 15; 1269 break; 1270 } 1271 } 1272 1273 code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode 1274 1275 if (i->tex.liveOnly) 1276 code[0] |= 0x80000000; 1277 1278 switch (i->op) { 1279 case OP_TEX: break; 1280 case OP_TXB: code[1] |= 0x2000; break; 1281 case OP_TXL: code[1] |= 0x3000; break; 1282 case OP_TXF: break; 1283 case OP_TXG: break; 1284 case OP_TXD: break; 1285 case OP_TXLQ: break; 1286 default: 1287 assert(!"invalid texture op"); 1288 break; 1289 } 1290 1291 if (i->op == OP_TXF) { 1292 if (!i->tex.levelZero) 1293 code[1] |= 0x1000; 1294 } else 1295 if (i->tex.levelZero) { 1296 code[1] |= 0x1000; 1297 } 1298 1299 if (i->op != OP_TXD && i->tex.derivAll) 1300 code[1] |= 0x200; 1301 1302 emitPredicate(i); 1303 1304 code[1] |= i->tex.mask << 2; 1305 1306 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) 1307 1308 defId(i->def(0), 2); 1309 srcId(i->src(0), 10); 1310 srcId(i, src1, 23); 1311 1312 if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13; 1313 1314 // texture target: 1315 code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7; 1316 if (i->tex.target.isArray()) 1317 code[1] |= 0x40; 1318 if (i->tex.target.isShadow()) 1319 code[1] |= 0x400; 1320 if (i->tex.target == TEX_TARGET_2D_MS || 1321 i->tex.target == TEX_TARGET_2D_MS_ARRAY) 1322 code[1] |= 0x800; 1323 1324 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { 1325 // ? 1326 } 1327 1328 if (i->tex.useOffsets == 1) { 1329 switch (i->op) { 1330 case OP_TXF: code[1] |= 0x200; break; 1331 case OP_TXD: code[1] |= 0x00400000; break; 1332 default: code[1] |= 0x800; break; 1333 } 1334 } 1335 if (i->tex.useOffsets == 4) 1336 code[1] |= 0x1000; 1337} 1338 1339void 1340CodeEmitterGK110::emitTXQ(const TexInstruction *i) 1341{ 1342 code[0] = 0x00000002; 1343 code[1] = 0x75400001; 1344 1345 switch (i->tex.query) { 1346 case TXQ_DIMS: code[0] |= 0x01 << 25; break; 1347 case TXQ_TYPE: code[0] |= 0x02 << 25; break; 1348 case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break; 1349 case TXQ_FILTER: code[0] |= 0x10 << 25; break; 1350 case TXQ_LOD: code[0] |= 0x12 << 25; break; 1351 case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break; 1352 default: 1353 assert(!"invalid texture query"); 1354 break; 1355 } 1356 1357 code[1] |= i->tex.mask << 2; 1358 code[1] |= i->tex.r << 9; 1359 if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0) 1360 code[1] |= 0x08000000; 1361 1362 defId(i->def(0), 2); 1363 srcId(i->src(0), 10); 1364 1365 emitPredicate(i); 1366} 1367 1368void 1369CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) 1370{ 1371 code[0] = 0x00000002 | ((qOp & 1) << 31); 1372 code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall 1373 1374 defId(i->def(0), 2); 1375 srcId(i->src(0), 10); 1376 srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23); 1377 1378 emitPredicate(i); 1379} 1380 1381void 1382CodeEmitterGK110::emitPIXLD(const Instruction *i) 1383{ 1384 emitForm_L(i, 0x7f4, 2, Modifier(0)); 1385 code[1] |= i->subOp << 2; 1386 code[1] |= 0x00070000; 1387} 1388 1389void 1390CodeEmitterGK110::emitBAR(const Instruction *i) 1391{ 1392 code[0] = 0x00000002; 1393 code[1] = 0x85400000; 1394 1395 switch (i->subOp) { 1396 case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break; 1397 case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break; 1398 case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break; 1399 case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break; 1400 default: 1401 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC); 1402 break; 1403 } 1404 1405 emitPredicate(i); 1406 1407 // barrier id 1408 if (i->src(0).getFile() == FILE_GPR) { 1409 srcId(i->src(0), 10); 1410 } else { 1411 ImmediateValue *imm = i->getSrc(0)->asImm(); 1412 assert(imm); 1413 code[0] |= imm->reg.data.u32 << 10; 1414 code[1] |= 0x8000; 1415 } 1416 1417 // thread count 1418 if (i->src(1).getFile() == FILE_GPR) { 1419 srcId(i->src(1), 23); 1420 } else { 1421 ImmediateValue *imm = i->getSrc(0)->asImm(); 1422 assert(imm); 1423 assert(imm->reg.data.u32 <= 0xfff); 1424 code[0] |= imm->reg.data.u32 << 23; 1425 code[1] |= imm->reg.data.u32 >> 9; 1426 code[1] |= 0x4000; 1427 } 1428 1429 if (i->srcExists(2) && (i->predSrc != 2)) { 1430 srcId(i->src(2), 32 + 10); 1431 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) 1432 code[1] |= 1 << 13; 1433 } else { 1434 code[1] |= 7 << 10; 1435 } 1436} 1437 1438void CodeEmitterGK110::emitMEMBAR(const Instruction *i) 1439{ 1440 code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8; 1441 code[1] = 0x7cc00000; 1442 1443 emitPredicate(i); 1444} 1445 1446void 1447CodeEmitterGK110::emitFlow(const Instruction *i) 1448{ 1449 const FlowInstruction *f = i->asFlow(); 1450 1451 unsigned mask; // bit 0: predicate, bit 1: target 1452 1453 code[0] = 0x00000000; 1454 1455 switch (i->op) { 1456 case OP_BRA: 1457 code[1] = f->absolute ? 0x10800000 : 0x12000000; 1458 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) 1459 code[0] |= 0x80; 1460 mask = 3; 1461 break; 1462 case OP_CALL: 1463 code[1] = f->absolute ? 0x11000000 : 0x13000000; 1464 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) 1465 code[0] |= 0x80; 1466 mask = 2; 1467 break; 1468 1469 case OP_EXIT: code[1] = 0x18000000; mask = 1; break; 1470 case OP_RET: code[1] = 0x19000000; mask = 1; break; 1471 case OP_DISCARD: code[1] = 0x19800000; mask = 1; break; 1472 case OP_BREAK: code[1] = 0x1a000000; mask = 1; break; 1473 case OP_CONT: code[1] = 0x1a800000; mask = 1; break; 1474 1475 case OP_JOINAT: code[1] = 0x14800000; mask = 2; break; 1476 case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break; 1477 case OP_PRECONT: code[1] = 0x15800000; mask = 2; break; 1478 case OP_PRERET: code[1] = 0x13800000; mask = 2; break; 1479 1480 case OP_QUADON: code[1] = 0x1b800000; mask = 0; break; 1481 case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break; 1482 case OP_BRKPT: code[1] = 0x00000000; mask = 0; break; 1483 default: 1484 assert(!"invalid flow operation"); 1485 return; 1486 } 1487 1488 if (mask & 1) { 1489 emitPredicate(i); 1490 if (i->flagsSrc < 0) 1491 code[0] |= 0x3c; 1492 } 1493 1494 if (!f) 1495 return; 1496 1497 if (f->allWarp) 1498 code[0] |= 1 << 9; 1499 if (f->limit) 1500 code[0] |= 1 << 8; 1501 1502 if (f->op == OP_CALL) { 1503 if (f->builtin) { 1504 assert(f->absolute); 1505 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin); 1506 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23); 1507 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9); 1508 } else { 1509 assert(!f->absolute); 1510 int32_t pcRel = f->target.fn->binPos - (codeSize + 8); 1511 code[0] |= (pcRel & 0x1ff) << 23; 1512 code[1] |= (pcRel >> 9) & 0x7fff; 1513 } 1514 } else 1515 if (mask & 2) { 1516 int32_t pcRel = f->target.bb->binPos - (codeSize + 8); 1517 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f)) 1518 pcRel += 8; 1519 // currently we don't want absolute branches 1520 assert(!f->absolute); 1521 code[0] |= (pcRel & 0x1ff) << 23; 1522 code[1] |= (pcRel >> 9) & 0x7fff; 1523 } 1524} 1525 1526void 1527CodeEmitterGK110::emitVOTE(const Instruction *i) 1528{ 1529 assert(i->src(0).getFile() == FILE_PREDICATE); 1530 1531 code[0] = 0x00000002; 1532 code[1] = 0x86c00000 | (i->subOp << 19); 1533 1534 emitPredicate(i); 1535 1536 unsigned rp = 0; 1537 for (int d = 0; i->defExists(d); d++) { 1538 if (i->def(d).getFile() == FILE_PREDICATE) { 1539 assert(!(rp & 2)); 1540 rp |= 2; 1541 defId(i->def(d), 48); 1542 } else if (i->def(d).getFile() == FILE_GPR) { 1543 assert(!(rp & 1)); 1544 rp |= 1; 1545 defId(i->def(d), 2); 1546 } else { 1547 assert(!"Unhandled def"); 1548 } 1549 } 1550 if (!(rp & 1)) 1551 code[0] |= 255 << 2; 1552 if (!(rp & 2)) 1553 code[1] |= 7 << 16; 1554 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) 1555 code[1] |= 1 << 13; 1556 srcId(i->src(0), 42); 1557} 1558 1559void 1560CodeEmitterGK110::emitSUGType(DataType ty, const int pos) 1561{ 1562 uint8_t n = 0; 1563 1564 switch (ty) { 1565 case TYPE_S32: n = 1; break; 1566 case TYPE_U8: n = 2; break; 1567 case TYPE_S8: n = 3; break; 1568 default: 1569 assert(ty == TYPE_U32); 1570 break; 1571 } 1572 code[pos / 32] |= n << (pos % 32); 1573} 1574 1575void 1576CodeEmitterGK110::emitSUCachingMode(CacheMode c) 1577{ 1578 uint8_t n = 0; 1579 1580 switch (c) { 1581 case CACHE_CA: 1582// case CACHE_WB: 1583 n = 0; 1584 break; 1585 case CACHE_CG: 1586 n = 1; 1587 break; 1588 case CACHE_CS: 1589 n = 2; 1590 break; 1591 case CACHE_CV: 1592// case CACHE_WT: 1593 n = 3; 1594 break; 1595 default: 1596 assert(!"invalid caching mode"); 1597 break; 1598 } 1599 code[0] |= (n & 1) << 31; 1600 code[1] |= (n & 2) >> 1; 1601} 1602 1603void 1604CodeEmitterGK110::setSUConst16(const Instruction *i, const int s) 1605{ 1606 const uint32_t offset = i->getSrc(s)->reg.data.offset; 1607 1608 assert(offset == (offset & 0xfffc)); 1609 1610 code[0] |= offset << 21; 1611 code[1] |= offset >> 11; 1612 code[1] |= i->getSrc(s)->reg.fileIndex << 5; 1613} 1614 1615void 1616CodeEmitterGK110::emitSULDGB(const TexInstruction *i) 1617{ 1618 code[0] = 0x00000002; 1619 code[1] = 0x30000000 | (i->subOp << 14); 1620 1621 if (i->src(1).getFile() == FILE_MEMORY_CONST) { 1622 emitLoadStoreType(i->dType, 0x38); 1623 emitCachingMode(i->cache, 0x36); 1624 1625 // format 1626 setSUConst16(i, 1); 1627 } else { 1628 assert(i->src(1).getFile() == FILE_GPR); 1629 code[1] |= 0x49800000; 1630 1631 emitLoadStoreType(i->dType, 0x21); 1632 emitSUCachingMode(i->cache); 1633 1634 srcId(i->src(1), 23); 1635 } 1636 1637 emitSUGType(i->sType, 0x34); 1638 1639 emitPredicate(i); 1640 defId(i->def(0), 2); // destination 1641 srcId(i->src(0), 10); // address 1642 1643 // surface predicate 1644 if (!i->srcExists(2) || (i->predSrc == 2)) { 1645 code[1] |= 0x7 << 10; 1646 } else { 1647 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) 1648 code[1] |= 1 << 13; 1649 srcId(i->src(2), 32 + 10); 1650 } 1651} 1652 1653void 1654CodeEmitterGK110::emitSUSTGx(const TexInstruction *i) 1655{ 1656 assert(i->op == OP_SUSTP); 1657 1658 code[0] = 0x00000002; 1659 code[1] = 0x38000000; 1660 1661 if (i->src(1).getFile() == FILE_MEMORY_CONST) { 1662 code[0] |= i->subOp << 2; 1663 1664 if (i->op == OP_SUSTP) 1665 code[0] |= i->tex.mask << 4; 1666 1667 emitSUGType(i->sType, 0x8); 1668 emitCachingMode(i->cache, 0x36); 1669 1670 // format 1671 setSUConst16(i, 1); 1672 } else { 1673 assert(i->src(1).getFile() == FILE_GPR); 1674 1675 code[0] |= i->subOp << 23; 1676 code[1] |= 0x41c00000; 1677 1678 if (i->op == OP_SUSTP) 1679 code[0] |= i->tex.mask << 25; 1680 1681 emitSUGType(i->sType, 0x1d); 1682 emitSUCachingMode(i->cache); 1683 1684 srcId(i->src(1), 2); 1685 } 1686 1687 emitPredicate(i); 1688 srcId(i->src(0), 10); // address 1689 srcId(i->src(3), 42); // values 1690 1691 // surface predicate 1692 if (!i->srcExists(2) || (i->predSrc == 2)) { 1693 code[1] |= 0x7 << 18; 1694 } else { 1695 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) 1696 code[1] |= 1 << 21; 1697 srcId(i->src(2), 32 + 18); 1698 } 1699} 1700 1701void 1702CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp) 1703{ 1704 uint8_t m; 1705 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) { 1706 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break; 1707 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break; 1708 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break; 1709 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break; 1710 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break; 1711 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break; 1712 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break; 1713 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break; 1714 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break; 1715 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break; 1716 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break; 1717 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break; 1718 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break; 1719 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break; 1720 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break; 1721 default: 1722 return; 1723 } 1724 code[1] |= m << 20; 1725 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D) 1726 code[1] |= 1 << 24; 1727} 1728 1729void 1730CodeEmitterGK110::emitSUCalc(Instruction *i) 1731{ 1732 ImmediateValue *imm = NULL; 1733 uint64_t opc1, opc2; 1734 1735 if (i->srcExists(2)) { 1736 imm = i->getSrc(2)->asImm(); 1737 if (imm) 1738 i->setSrc(2, NULL); // special case, make emitForm_21 not assert 1739 } 1740 1741 switch (i->op) { 1742 case OP_SUCLAMP: opc1 = 0xb00; opc2 = 0x580; break; 1743 case OP_SUBFM: opc1 = 0xb68; opc2 = 0x1e8; break; 1744 case OP_SUEAU: opc1 = 0xb6c; opc2 = 0x1ec; break; 1745 default: 1746 assert(0); 1747 return; 1748 } 1749 emitForm_21(i, opc2, opc1); 1750 1751 if (i->op == OP_SUCLAMP) { 1752 if (i->dType == TYPE_S32) 1753 code[1] |= 1 << 19; 1754 emitSUCLAMPMode(i->subOp); 1755 } 1756 1757 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D) 1758 code[1] |= 1 << 18; 1759 1760 if (i->op != OP_SUEAU) { 1761 const uint8_t pos = i->op == OP_SUBFM ? 19 : 16; 1762 if (i->def(0).getFile() == FILE_PREDICATE) { // p, # 1763 code[0] |= 255 << 2; 1764 code[1] |= i->getDef(1)->reg.data.id << pos; 1765 } else 1766 if (i->defExists(1)) { // r, p 1767 assert(i->def(1).getFile() == FILE_PREDICATE); 1768 code[1] |= i->getDef(1)->reg.data.id << pos; 1769 } else { // r, # 1770 code[1] |= 7 << pos; 1771 } 1772 } 1773 1774 if (imm) { 1775 assert(i->op == OP_SUCLAMP); 1776 i->setSrc(2, imm); 1777 code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6 1778 } 1779} 1780 1781 1782void 1783CodeEmitterGK110::emitVectorSubOp(const Instruction *i) 1784{ 1785 switch (NV50_IR_SUBOP_Vn(i->subOp)) { 1786 case 0: 1787 code[1] |= (i->subOp & 0x000f) << 7; // vsrc1 1788 code[1] |= (i->subOp & 0x00e0) >> 6; // vsrc2 1789 code[1] |= (i->subOp & 0x0100) << 13; // vsrc2 1790 code[1] |= (i->subOp & 0x3c00) << 12; // vdst 1791 break; 1792 default: 1793 assert(0); 1794 break; 1795 } 1796} 1797 1798void 1799CodeEmitterGK110::emitVSHL(const Instruction *i) 1800{ 1801 code[0] = 0x00000002; 1802 code[1] = 0xb8000000; 1803 1804 assert(NV50_IR_SUBOP_Vn(i->subOp) == 0); 1805 1806 if (isSignedType(i->dType)) code[1] |= 1 << 25; 1807 if (isSignedType(i->sType)) code[1] |= 1 << 19; 1808 1809 emitVectorSubOp(i); 1810 1811 emitPredicate(i); 1812 defId(i->def(0), 2); 1813 srcId(i->src(0), 10); 1814 1815 if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) { 1816 ImmediateValue *imm = i->getSrc(1)->asImm(); 1817 assert(imm); 1818 code[0] |= (imm->reg.data.u32 & 0x01ff) << 23; 1819 code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9; 1820 } else { 1821 assert(i->getSrc(1)->reg.file == FILE_GPR); 1822 code[1] |= 1 << 21; 1823 srcId(i->src(1), 23); 1824 } 1825 srcId(i->src(2), 42); 1826 1827 if (i->saturate) 1828 code[0] |= 1 << 22; 1829 if (i->flagsDef >= 0) 1830 code[1] |= 1 << 18; 1831} 1832 1833void 1834CodeEmitterGK110::emitAFETCH(const Instruction *i) 1835{ 1836 uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff; 1837 1838 code[0] = 0x00000002 | (offset << 23); 1839 code[1] = 0x7d000000 | (offset >> 9); 1840 1841 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) 1842 code[1] |= 0x8; 1843 1844 emitPredicate(i); 1845 1846 defId(i->def(0), 2); 1847 srcId(i->src(0).getIndirect(0), 10); 1848} 1849 1850void 1851CodeEmitterGK110::emitPFETCH(const Instruction *i) 1852{ 1853 uint32_t prim = i->src(0).get()->reg.data.u32; 1854 1855 code[0] = 0x00000002 | ((prim & 0xff) << 23); 1856 code[1] = 0x7f800000; 1857 1858 emitPredicate(i); 1859 1860 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) 1861 1862 defId(i->def(0), 2); 1863 srcId(i, src1, 10); 1864} 1865 1866void 1867CodeEmitterGK110::emitVFETCH(const Instruction *i) 1868{ 1869 unsigned int size = typeSizeof(i->dType); 1870 uint32_t offset = i->src(0).get()->reg.data.offset; 1871 1872 code[0] = 0x00000002 | (offset << 23); 1873 code[1] = 0x7ec00000 | (offset >> 9); 1874 code[1] |= (size / 4 - 1) << 18; 1875 1876 if (i->perPatch) 1877 code[1] |= 0x4; 1878 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) 1879 code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads 1880 1881 emitPredicate(i); 1882 1883 defId(i->def(0), 2); 1884 srcId(i->src(0).getIndirect(0), 10); 1885 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address 1886} 1887 1888void 1889CodeEmitterGK110::emitEXPORT(const Instruction *i) 1890{ 1891 unsigned int size = typeSizeof(i->dType); 1892 uint32_t offset = i->src(0).get()->reg.data.offset; 1893 1894 code[0] = 0x00000002 | (offset << 23); 1895 code[1] = 0x7f000000 | (offset >> 9); 1896 code[1] |= (size / 4 - 1) << 18; 1897 1898 if (i->perPatch) 1899 code[1] |= 0x4; 1900 1901 emitPredicate(i); 1902 1903 assert(i->src(1).getFile() == FILE_GPR); 1904 1905 srcId(i->src(0).getIndirect(0), 10); 1906 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address 1907 srcId(i->src(1), 2); 1908} 1909 1910void 1911CodeEmitterGK110::emitOUT(const Instruction *i) 1912{ 1913 assert(i->src(0).getFile() == FILE_GPR); 1914 1915 emitForm_21(i, 0x1f0, 0xb70); 1916 1917 if (i->op == OP_EMIT) 1918 code[1] |= 1 << 10; 1919 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART) 1920 code[1] |= 1 << 11; 1921} 1922 1923void 1924CodeEmitterGK110::emitInterpMode(const Instruction *i) 1925{ 1926 code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID 1927 code[1] |= (i->ipa & 0xc) << (19 - 2); 1928} 1929 1930static void 1931interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) 1932{ 1933 int ipa = entry->ipa; 1934 int reg = entry->reg; 1935 int loc = entry->loc; 1936 1937 if (data.flatshade && 1938 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { 1939 ipa = NV50_IR_INTERP_FLAT; 1940 reg = 0xff; 1941 } else if (data.force_persample_interp && 1942 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && 1943 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { 1944 ipa |= NV50_IR_INTERP_CENTROID; 1945 } 1946 code[loc + 1] &= ~(0xf << 19); 1947 code[loc + 1] |= (ipa & 0x3) << 21; 1948 code[loc + 1] |= (ipa & 0xc) << (19 - 2); 1949 code[loc + 0] &= ~(0xff << 23); 1950 code[loc + 0] |= reg << 23; 1951} 1952 1953void 1954CodeEmitterGK110::emitINTERP(const Instruction *i) 1955{ 1956 const uint32_t base = i->getSrc(0)->reg.data.offset; 1957 1958 code[0] = 0x00000002 | (base << 31); 1959 code[1] = 0x74800000 | (base >> 1); 1960 1961 if (i->saturate) 1962 code[1] |= 1 << 18; 1963 1964 if (i->op == OP_PINTERP) { 1965 srcId(i->src(1), 23); 1966 addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); 1967 } else { 1968 code[0] |= 0xff << 23; 1969 addInterp(i->ipa, 0xff, interpApply); 1970 } 1971 1972 srcId(i->src(0).getIndirect(0), 10); 1973 emitInterpMode(i); 1974 1975 emitPredicate(i); 1976 defId(i->def(0), 2); 1977 1978 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) 1979 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10); 1980 else 1981 code[1] |= 0xff << 10; 1982} 1983 1984void 1985CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos) 1986{ 1987 uint8_t n; 1988 1989 switch (ty) { 1990 case TYPE_U8: 1991 n = 0; 1992 break; 1993 case TYPE_S8: 1994 n = 1; 1995 break; 1996 case TYPE_U16: 1997 n = 2; 1998 break; 1999 case TYPE_S16: 2000 n = 3; 2001 break; 2002 case TYPE_F32: 2003 case TYPE_U32: 2004 case TYPE_S32: 2005 n = 4; 2006 break; 2007 case TYPE_F64: 2008 case TYPE_U64: 2009 case TYPE_S64: 2010 n = 5; 2011 break; 2012 case TYPE_B128: 2013 n = 6; 2014 break; 2015 default: 2016 n = 0; 2017 assert(!"invalid ld/st type"); 2018 break; 2019 } 2020 code[pos / 32] |= n << (pos % 32); 2021} 2022 2023void 2024CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos) 2025{ 2026 uint8_t n; 2027 2028 switch (c) { 2029 case CACHE_CA: 2030// case CACHE_WB: 2031 n = 0; 2032 break; 2033 case CACHE_CG: 2034 n = 1; 2035 break; 2036 case CACHE_CS: 2037 n = 2; 2038 break; 2039 case CACHE_CV: 2040// case CACHE_WT: 2041 n = 3; 2042 break; 2043 default: 2044 n = 0; 2045 assert(!"invalid caching mode"); 2046 break; 2047 } 2048 code[pos / 32] |= n << (pos % 32); 2049} 2050 2051void 2052CodeEmitterGK110::emitSTORE(const Instruction *i) 2053{ 2054 int32_t offset = SDATA(i->src(0)).offset; 2055 2056 switch (i->src(0).getFile()) { 2057 case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break; 2058 case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; 2059 case FILE_MEMORY_SHARED: 2060 code[0] = 0x00000002; 2061 if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) 2062 code[1] = 0x78400000; 2063 else 2064 code[1] = 0x7ac00000; 2065 break; 2066 default: 2067 assert(!"invalid memory file"); 2068 break; 2069 } 2070 2071 if (code[0] & 0x2) { 2072 offset &= 0xffffff; 2073 emitLoadStoreType(i->dType, 0x33); 2074 if (i->src(0).getFile() == FILE_MEMORY_LOCAL) 2075 emitCachingMode(i->cache, 0x2f); 2076 } else { 2077 emitLoadStoreType(i->dType, 0x38); 2078 emitCachingMode(i->cache, 0x3b); 2079 } 2080 code[0] |= offset << 23; 2081 code[1] |= offset >> 9; 2082 2083 // Unlocked store on shared memory can fail. 2084 if (i->src(0).getFile() == FILE_MEMORY_SHARED && 2085 i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { 2086 assert(i->defExists(0)); 2087 defId(i->def(0), 32 + 16); 2088 } 2089 2090 emitPredicate(i); 2091 2092 srcId(i->src(1), 2); 2093 srcId(i->src(0).getIndirect(0), 10); 2094 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL && 2095 i->src(0).isIndirect(0) && 2096 i->getIndirect(0, 0)->reg.size == 8) 2097 code[1] |= 1 << 23; 2098} 2099 2100void 2101CodeEmitterGK110::emitLOAD(const Instruction *i) 2102{ 2103 int32_t offset = SDATA(i->src(0)).offset; 2104 2105 switch (i->src(0).getFile()) { 2106 case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break; 2107 case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; 2108 case FILE_MEMORY_SHARED: 2109 code[0] = 0x00000002; 2110 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) 2111 code[1] = 0x77400000; 2112 else 2113 code[1] = 0x7a400000; 2114 break; 2115 case FILE_MEMORY_CONST: 2116 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { 2117 emitMOV(i); 2118 return; 2119 } 2120 offset &= 0xffff; 2121 code[0] = 0x00000002; 2122 code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7); 2123 code[1] |= i->subOp << 15; 2124 break; 2125 default: 2126 assert(!"invalid memory file"); 2127 break; 2128 } 2129 2130 if (code[0] & 0x2) { 2131 offset &= 0xffffff; 2132 emitLoadStoreType(i->dType, 0x33); 2133 if (i->src(0).getFile() == FILE_MEMORY_LOCAL) 2134 emitCachingMode(i->cache, 0x2f); 2135 } else { 2136 emitLoadStoreType(i->dType, 0x38); 2137 emitCachingMode(i->cache, 0x3b); 2138 } 2139 code[0] |= offset << 23; 2140 code[1] |= offset >> 9; 2141 2142 // Locked store on shared memory can fail. 2143 int r = 0, p = -1; 2144 if (i->src(0).getFile() == FILE_MEMORY_SHARED && 2145 i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { 2146 if (i->def(0).getFile() == FILE_PREDICATE) { // p, # 2147 r = -1; 2148 p = 0; 2149 } else if (i->defExists(1)) { // r, p 2150 p = 1; 2151 } else { 2152 assert(!"Expected predicate dest for load locked"); 2153 } 2154 } 2155 2156 emitPredicate(i); 2157 2158 if (r >= 0) 2159 defId(i->def(r), 2); 2160 else 2161 code[0] |= 255 << 2; 2162 2163 if (p >= 0) 2164 defId(i->def(p), 32 + 16); 2165 2166 if (i->getIndirect(0, 0)) { 2167 srcId(i->src(0).getIndirect(0), 10); 2168 if (i->getIndirect(0, 0)->reg.size == 8) 2169 code[1] |= 1 << 23; 2170 } else { 2171 code[0] |= 255 << 10; 2172 } 2173} 2174 2175uint8_t 2176CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) 2177{ 2178 switch (SDATA(ref).sv.sv) { 2179 case SV_LANEID: return 0x00; 2180 case SV_PHYSID: return 0x03; 2181 case SV_VERTEX_COUNT: return 0x10; 2182 case SV_INVOCATION_ID: return 0x11; 2183 case SV_YDIR: return 0x12; 2184 case SV_THREAD_KILL: return 0x13; 2185 case SV_TID: return 0x21 + SDATA(ref).sv.index; 2186 case SV_CTAID: return 0x25 + SDATA(ref).sv.index; 2187 case SV_NTID: return 0x29 + SDATA(ref).sv.index; 2188 case SV_GRIDID: return 0x2c; 2189 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; 2190 case SV_LBASE: return 0x34; 2191 case SV_SBASE: return 0x30; 2192 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; 2193 default: 2194 assert(!"no sreg for system value"); 2195 return 0; 2196 } 2197} 2198 2199void 2200CodeEmitterGK110::emitMOV(const Instruction *i) 2201{ 2202 if (i->def(0).getFile() == FILE_PREDICATE) { 2203 if (i->src(0).getFile() == FILE_GPR) { 2204 // Use ISETP.NE.AND dst, PT, src, RZ, PT 2205 code[0] = 0x00000002; 2206 code[1] = 0xdb500000; 2207 2208 code[0] |= 0x7 << 2; 2209 code[0] |= 0xff << 23; 2210 code[1] |= 0x7 << 10; 2211 srcId(i->src(0), 10); 2212 } else 2213 if (i->src(0).getFile() == FILE_PREDICATE) { 2214 // Use PSETP.AND.AND dst, PT, src, PT, PT 2215 code[0] = 0x00000002; 2216 code[1] = 0x84800000; 2217 2218 code[0] |= 0x7 << 2; 2219 code[1] |= 0x7 << 0; 2220 code[1] |= 0x7 << 10; 2221 2222 srcId(i->src(0), 14); 2223 } else { 2224 assert(!"Unexpected source for predicate destination"); 2225 emitNOP(i); 2226 } 2227 emitPredicate(i); 2228 defId(i->def(0), 5); 2229 } else 2230 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { 2231 code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23); 2232 code[1] = 0x86400000; 2233 emitPredicate(i); 2234 defId(i->def(0), 2); 2235 } else 2236 if (i->src(0).getFile() == FILE_IMMEDIATE) { 2237 code[0] = 0x00000002 | (i->lanes << 14); 2238 code[1] = 0x74000000; 2239 emitPredicate(i); 2240 defId(i->def(0), 2); 2241 setImmediate32(i, 0, Modifier(0)); 2242 } else 2243 if (i->src(0).getFile() == FILE_PREDICATE) { 2244 code[0] = 0x00000002; 2245 code[1] = 0x84401c07; 2246 emitPredicate(i); 2247 defId(i->def(0), 2); 2248 srcId(i->src(0), 14); 2249 } else { 2250 emitForm_C(i, 0x24c, 2); 2251 code[1] |= i->lanes << 10; 2252 } 2253} 2254 2255static inline bool 2256uses64bitAddress(const Instruction *ldst) 2257{ 2258 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL && 2259 ldst->src(0).isIndirect(0) && 2260 ldst->getIndirect(0, 0)->reg.size == 8; 2261} 2262 2263void 2264CodeEmitterGK110::emitATOM(const Instruction *i) 2265{ 2266 const bool hasDst = i->defExists(0); 2267 const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH; 2268 2269 code[0] = 0x00000002; 2270 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) 2271 code[1] = 0x77800000; 2272 else 2273 code[1] = 0x68000000; 2274 2275 switch (i->subOp) { 2276 case NV50_IR_SUBOP_ATOM_CAS: break; 2277 case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break; 2278 default: code[1] |= i->subOp << 23; break; 2279 } 2280 2281 switch (i->dType) { 2282 case TYPE_U32: break; 2283 case TYPE_S32: code[1] |= 0x00100000; break; 2284 case TYPE_U64: code[1] |= 0x00200000; break; 2285 case TYPE_F32: code[1] |= 0x00300000; break; 2286 case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */ 2287 case TYPE_S64: code[1] |= 0x00500000; break; 2288 default: assert(!"unsupported type"); break; 2289 } 2290 2291 emitPredicate(i); 2292 2293 /* TODO: cas: check that src regs line up */ 2294 /* TODO: cas: flip bits if $r255 is used */ 2295 srcId(i->src(1), 23); 2296 2297 if (hasDst) { 2298 defId(i->def(0), 2); 2299 } else 2300 if (!exch) { 2301 code[0] |= 255 << 2; 2302 } 2303 2304 if (hasDst || !exch) { 2305 const int32_t offset = SDATA(i->src(0)).offset; 2306 assert(offset < 0x80000 && offset >= -0x80000); 2307 code[0] |= (offset & 1) << 31; 2308 code[1] |= (offset & 0xffffe) >> 1; 2309 } else { 2310 srcAddr32(i->src(0), 31); 2311 } 2312 2313 if (i->getIndirect(0, 0)) { 2314 srcId(i->getIndirect(0, 0), 10); 2315 if (i->getIndirect(0, 0)->reg.size == 8) 2316 code[1] |= 1 << 19; 2317 } else { 2318 code[0] |= 255 << 10; 2319 } 2320} 2321 2322void 2323CodeEmitterGK110::emitCCTL(const Instruction *i) 2324{ 2325 int32_t offset = SDATA(i->src(0)).offset; 2326 2327 code[0] = 0x00000002 | (i->subOp << 2); 2328 2329 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { 2330 code[1] = 0x7b000000; 2331 } else { 2332 code[1] = 0x7c000000; 2333 offset &= 0xffffff; 2334 } 2335 code[0] |= offset << 23; 2336 code[1] |= offset >> 9; 2337 2338 if (uses64bitAddress(i)) 2339 code[1] |= 1 << 23; 2340 srcId(i->src(0).getIndirect(0), 10); 2341 2342 emitPredicate(i); 2343} 2344 2345bool 2346CodeEmitterGK110::emitInstruction(Instruction *insn) 2347{ 2348 const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8; 2349 2350 if (insn->encSize != 8) { 2351 ERROR("skipping unencodable instruction: "); 2352 insn->print(); 2353 return false; 2354 } else 2355 if (codeSize + size > codeSizeLimit) { 2356 ERROR("code emitter output buffer too small\n"); 2357 return false; 2358 } 2359 2360 if (writeIssueDelays) { 2361 int id = (codeSize & 0x3f) / 8 - 1; 2362 if (id < 0) { 2363 id += 1; 2364 code[0] = 0x00000000; // cf issue delay "instruction" 2365 code[1] = 0x08000000; 2366 code += 2; 2367 codeSize += 8; 2368 } 2369 uint32_t *data = code - (id * 2 + 2); 2370 2371 switch (id) { 2372 case 0: data[0] |= insn->sched << 2; break; 2373 case 1: data[0] |= insn->sched << 10; break; 2374 case 2: data[0] |= insn->sched << 18; break; 2375 case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break; 2376 case 4: data[1] |= insn->sched << 2; break; 2377 case 5: data[1] |= insn->sched << 10; break; 2378 case 6: data[1] |= insn->sched << 18; break; 2379 default: 2380 assert(0); 2381 break; 2382 } 2383 } 2384 2385 // assert that instructions with multiple defs don't corrupt registers 2386 for (int d = 0; insn->defExists(d); ++d) 2387 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); 2388 2389 switch (insn->op) { 2390 case OP_MOV: 2391 case OP_RDSV: 2392 emitMOV(insn); 2393 break; 2394 case OP_NOP: 2395 break; 2396 case OP_LOAD: 2397 emitLOAD(insn); 2398 break; 2399 case OP_STORE: 2400 emitSTORE(insn); 2401 break; 2402 case OP_LINTERP: 2403 case OP_PINTERP: 2404 emitINTERP(insn); 2405 break; 2406 case OP_VFETCH: 2407 emitVFETCH(insn); 2408 break; 2409 case OP_EXPORT: 2410 emitEXPORT(insn); 2411 break; 2412 case OP_AFETCH: 2413 emitAFETCH(insn); 2414 break; 2415 case OP_PFETCH: 2416 emitPFETCH(insn); 2417 break; 2418 case OP_EMIT: 2419 case OP_RESTART: 2420 emitOUT(insn); 2421 break; 2422 case OP_ADD: 2423 case OP_SUB: 2424 if (insn->dType == TYPE_F64) 2425 emitDADD(insn); 2426 else if (isFloatType(insn->dType)) 2427 emitFADD(insn); 2428 else 2429 emitUADD(insn); 2430 break; 2431 case OP_MUL: 2432 if (insn->dType == TYPE_F64) 2433 emitDMUL(insn); 2434 else if (isFloatType(insn->dType)) 2435 emitFMUL(insn); 2436 else 2437 emitIMUL(insn); 2438 break; 2439 case OP_MAD: 2440 case OP_FMA: 2441 if (insn->dType == TYPE_F64) 2442 emitDMAD(insn); 2443 else if (isFloatType(insn->dType)) 2444 emitFMAD(insn); 2445 else 2446 emitIMAD(insn); 2447 break; 2448 case OP_MADSP: 2449 emitMADSP(insn); 2450 break; 2451 case OP_SAD: 2452 emitISAD(insn); 2453 break; 2454 case OP_SHLADD: 2455 emitSHLADD(insn); 2456 break; 2457 case OP_NOT: 2458 emitNOT(insn); 2459 break; 2460 case OP_AND: 2461 emitLogicOp(insn, 0); 2462 break; 2463 case OP_OR: 2464 emitLogicOp(insn, 1); 2465 break; 2466 case OP_XOR: 2467 emitLogicOp(insn, 2); 2468 break; 2469 case OP_SHL: 2470 case OP_SHR: 2471 emitShift(insn); 2472 break; 2473 case OP_SET: 2474 case OP_SET_AND: 2475 case OP_SET_OR: 2476 case OP_SET_XOR: 2477 emitSET(insn->asCmp()); 2478 break; 2479 case OP_SELP: 2480 emitSELP(insn); 2481 break; 2482 case OP_SLCT: 2483 emitSLCT(insn->asCmp()); 2484 break; 2485 case OP_MIN: 2486 case OP_MAX: 2487 emitMINMAX(insn); 2488 break; 2489 case OP_ABS: 2490 case OP_NEG: 2491 case OP_CEIL: 2492 case OP_FLOOR: 2493 case OP_TRUNC: 2494 case OP_SAT: 2495 emitCVT(insn); 2496 break; 2497 case OP_CVT: 2498 if (insn->def(0).getFile() == FILE_PREDICATE || 2499 insn->src(0).getFile() == FILE_PREDICATE) 2500 emitMOV(insn); 2501 else 2502 emitCVT(insn); 2503 break; 2504 case OP_RSQ: 2505 emitSFnOp(insn, 5 + 2 * insn->subOp); 2506 break; 2507 case OP_RCP: 2508 emitSFnOp(insn, 4 + 2 * insn->subOp); 2509 break; 2510 case OP_LG2: 2511 emitSFnOp(insn, 3); 2512 break; 2513 case OP_EX2: 2514 emitSFnOp(insn, 2); 2515 break; 2516 case OP_SIN: 2517 emitSFnOp(insn, 1); 2518 break; 2519 case OP_COS: 2520 emitSFnOp(insn, 0); 2521 break; 2522 case OP_PRESIN: 2523 case OP_PREEX2: 2524 emitPreOp(insn); 2525 break; 2526 case OP_TEX: 2527 case OP_TXB: 2528 case OP_TXL: 2529 case OP_TXD: 2530 case OP_TXF: 2531 case OP_TXG: 2532 case OP_TXLQ: 2533 emitTEX(insn->asTex()); 2534 break; 2535 case OP_TXQ: 2536 emitTXQ(insn->asTex()); 2537 break; 2538 case OP_TEXBAR: 2539 emitTEXBAR(insn); 2540 break; 2541 case OP_PIXLD: 2542 emitPIXLD(insn); 2543 break; 2544 case OP_BRA: 2545 case OP_CALL: 2546 case OP_PRERET: 2547 case OP_RET: 2548 case OP_DISCARD: 2549 case OP_EXIT: 2550 case OP_PRECONT: 2551 case OP_CONT: 2552 case OP_PREBREAK: 2553 case OP_BREAK: 2554 case OP_JOINAT: 2555 case OP_BRKPT: 2556 case OP_QUADON: 2557 case OP_QUADPOP: 2558 emitFlow(insn); 2559 break; 2560 case OP_QUADOP: 2561 emitQUADOP(insn, insn->subOp, insn->lanes); 2562 break; 2563 case OP_DFDX: 2564 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); 2565 break; 2566 case OP_DFDY: 2567 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); 2568 break; 2569 case OP_POPCNT: 2570 emitPOPC(insn); 2571 break; 2572 case OP_INSBF: 2573 emitINSBF(insn); 2574 break; 2575 case OP_EXTBF: 2576 emitEXTBF(insn); 2577 break; 2578 case OP_BFIND: 2579 emitBFIND(insn); 2580 break; 2581 case OP_PERMT: 2582 emitPERMT(insn); 2583 break; 2584 case OP_JOIN: 2585 emitNOP(insn); 2586 insn->join = 1; 2587 break; 2588 case OP_BAR: 2589 emitBAR(insn); 2590 break; 2591 case OP_MEMBAR: 2592 emitMEMBAR(insn); 2593 break; 2594 case OP_ATOM: 2595 emitATOM(insn); 2596 break; 2597 case OP_CCTL: 2598 emitCCTL(insn); 2599 break; 2600 case OP_VOTE: 2601 emitVOTE(insn); 2602 break; 2603 case OP_SULDB: 2604 emitSULDGB(insn->asTex()); 2605 break; 2606 case OP_SUSTB: 2607 case OP_SUSTP: 2608 emitSUSTGx(insn->asTex()); 2609 break; 2610 case OP_SUBFM: 2611 case OP_SUCLAMP: 2612 case OP_SUEAU: 2613 emitSUCalc(insn); 2614 break; 2615 case OP_VSHL: 2616 emitVSHL(insn); 2617 break; 2618 case OP_PHI: 2619 case OP_UNION: 2620 case OP_CONSTRAINT: 2621 ERROR("operation should have been eliminated"); 2622 return false; 2623 case OP_EXP: 2624 case OP_LOG: 2625 case OP_SQRT: 2626 case OP_POW: 2627 ERROR("operation should have been lowered\n"); 2628 return false; 2629 default: 2630 ERROR("unknown op: %u\n", insn->op); 2631 return false; 2632 } 2633 2634 if (insn->join) 2635 code[0] |= 1 << 22; 2636 2637 code += 2; 2638 codeSize += 8; 2639 return true; 2640} 2641 2642uint32_t 2643CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const 2644{ 2645 // No more short instruction encodings. 2646 return 8; 2647} 2648 2649void 2650CodeEmitterGK110::prepareEmission(Function *func) 2651{ 2652 const Target *targ = func->getProgram()->getTarget(); 2653 2654 CodeEmitter::prepareEmission(func); 2655 2656 if (targ->hasSWSched) 2657 calculateSchedDataNVC0(targ, func); 2658} 2659 2660CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target) 2661 : CodeEmitter(target), 2662 targNVC0(target), 2663 writeIssueDelays(target->hasSWSched) 2664{ 2665 code = NULL; 2666 codeSize = codeSizeLimit = 0; 2667 relocInfo = NULL; 2668} 2669 2670CodeEmitter * 2671TargetNVC0::createCodeEmitterGK110(Program::Type type) 2672{ 2673 CodeEmitterGK110 *emit = new CodeEmitterGK110(this); 2674 emit->setProgramType(type); 2675 return emit; 2676} 2677 2678} // namespace nv50_ir 2679