1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "codegen/nv50_ir_target_nvc0.h"
24
25namespace nv50_ir {
26
27// Argh, all these assertions ...
28
29class CodeEmitterNVC0 : public CodeEmitter
30{
31public:
32   CodeEmitterNVC0(const TargetNVC0 *);
33
34   virtual bool emitInstruction(Instruction *);
35   virtual uint32_t getMinEncodingSize(const Instruction *) const;
36   virtual void prepareEmission(Function *);
37
38   inline void setProgramType(Program::Type pType) { progType = pType; }
39
40private:
41   const TargetNVC0 *targNVC0;
42
43   Program::Type progType;
44
45   const bool writeIssueDelays;
46
47private:
48   void emitForm_A(const Instruction *, uint64_t);
49   void emitForm_B(const Instruction *, uint64_t);
50   void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52   void emitPredicate(const Instruction *);
53
54   void setAddress16(const ValueRef&);
55   void setAddress24(const ValueRef&);
56   void setAddressByFile(const ValueRef&);
57   void setImmediate(const Instruction *, const int s); // needs op already set
58   void setImmediateS8(const ValueRef&);
59   void setSUConst16(const Instruction *, const int s);
60   void setSUPred(const Instruction *, const int s);
61
62   void emitCondCode(CondCode cc, int pos);
63   void emitInterpMode(const Instruction *);
64   void emitLoadStoreType(DataType ty);
65   void emitSUGType(DataType);
66   void emitSUAddr(const TexInstruction *);
67   void emitSUDim(const TexInstruction *);
68   void emitCachingMode(CacheMode c);
69
70   void emitShortSrc2(const ValueRef&);
71
72   inline uint8_t getSRegEncoding(const ValueRef&);
73
74   void roundMode_A(const Instruction *);
75   void roundMode_C(const Instruction *);
76   void roundMode_CS(const Instruction *);
77
78   void emitNegAbs12(const Instruction *);
79
80   void emitNOP(const Instruction *);
81
82   void emitLOAD(const Instruction *);
83   void emitSTORE(const Instruction *);
84   void emitMOV(const Instruction *);
85   void emitATOM(const Instruction *);
86   void emitMEMBAR(const Instruction *);
87   void emitCCTL(const Instruction *);
88
89   void emitINTERP(const Instruction *);
90   void emitAFETCH(const Instruction *);
91   void emitPFETCH(const Instruction *);
92   void emitVFETCH(const Instruction *);
93   void emitEXPORT(const Instruction *);
94   void emitOUT(const Instruction *);
95
96   void emitUADD(const Instruction *);
97   void emitFADD(const Instruction *);
98   void emitDADD(const Instruction *);
99   void emitUMUL(const Instruction *);
100   void emitFMUL(const Instruction *);
101   void emitDMUL(const Instruction *);
102   void emitIMAD(const Instruction *);
103   void emitISAD(const Instruction *);
104   void emitSHLADD(const Instruction *a);
105   void emitFMAD(const Instruction *);
106   void emitDMAD(const Instruction *);
107   void emitMADSP(const Instruction *);
108
109   void emitNOT(Instruction *);
110   void emitLogicOp(const Instruction *, uint8_t subOp);
111   void emitPOPC(const Instruction *);
112   void emitINSBF(const Instruction *);
113   void emitEXTBF(const Instruction *);
114   void emitBFIND(const Instruction *);
115   void emitPERMT(const Instruction *);
116   void emitShift(const Instruction *);
117
118   void emitSFnOp(const Instruction *, uint8_t subOp);
119
120   void emitCVT(Instruction *);
121   void emitMINMAX(const Instruction *);
122   void emitPreOp(const Instruction *);
123
124   void emitSET(const CmpInstruction *);
125   void emitSLCT(const CmpInstruction *);
126   void emitSELP(const Instruction *);
127
128   void emitTEXBAR(const Instruction *);
129   void emitTEX(const TexInstruction *);
130   void emitTEXCSAA(const TexInstruction *);
131   void emitTXQ(const TexInstruction *);
132
133   void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
134
135   void emitFlow(const Instruction *);
136   void emitBAR(const Instruction *);
137
138   void emitSUCLAMPMode(uint16_t);
139   void emitSUCalc(Instruction *);
140   void emitSULDGB(const TexInstruction *);
141   void emitSUSTGx(const TexInstruction *);
142
143   void emitSULDB(const TexInstruction *);
144   void emitSUSTx(const TexInstruction *);
145   void emitSULEA(const TexInstruction *);
146
147   void emitVSHL(const Instruction *);
148   void emitVectorSubOp(const Instruction *);
149
150   void emitPIXLD(const Instruction *);
151
152   void emitVOTE(const Instruction *);
153
154   inline void defId(const ValueDef&, const int pos);
155   inline void defId(const Instruction *, int d, const int pos);
156   inline void srcId(const ValueRef&, const int pos);
157   inline void srcId(const ValueRef *, const int pos);
158   inline void srcId(const Instruction *, int s, const int pos);
159   inline void srcAddr32(const ValueRef&, int pos, int shr);
160
161   inline bool isLIMM(const ValueRef&, DataType ty);
162};
163
164// for better visibility
165#define HEX64(h, l) 0x##h##l##ULL
166
167#define SDATA(a) ((a).rep()->reg.data)
168#define DDATA(a) ((a).rep()->reg.data)
169
170void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
171{
172   code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
173}
174
175void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
176{
177   code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
178}
179
180void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
181{
182   int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
183   code[pos / 32] |= r << (pos % 32);
184}
185
186void
187CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
188{
189   const uint32_t offset = SDATA(src).offset >> shr;
190
191   code[pos / 32] |= offset << (pos % 32);
192   if (pos && (pos < 32))
193      code[1] |= offset >> (32 - pos);
194}
195
196void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
197{
198   code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
199}
200
201void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
202{
203   int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
204   code[pos / 32] |= r << (pos % 32);
205}
206
207bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
208{
209   const ImmediateValue *imm = ref.get()->asImm();
210
211   return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
212}
213
214void
215CodeEmitterNVC0::roundMode_A(const Instruction *insn)
216{
217   switch (insn->rnd) {
218   case ROUND_M: code[1] |= 1 << 23; break;
219   case ROUND_P: code[1] |= 2 << 23; break;
220   case ROUND_Z: code[1] |= 3 << 23; break;
221   default:
222      assert(insn->rnd == ROUND_N);
223      break;
224   }
225}
226
227void
228CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
229{
230   if (i->src(1).mod.abs()) code[0] |= 1 << 6;
231   if (i->src(0).mod.abs()) code[0] |= 1 << 7;
232   if (i->src(1).mod.neg()) code[0] |= 1 << 8;
233   if (i->src(0).mod.neg()) code[0] |= 1 << 9;
234}
235
236void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
237{
238   uint8_t val;
239
240   switch (cc) {
241   case CC_LT:  val = 0x1; break;
242   case CC_LTU: val = 0x9; break;
243   case CC_EQ:  val = 0x2; break;
244   case CC_EQU: val = 0xa; break;
245   case CC_LE:  val = 0x3; break;
246   case CC_LEU: val = 0xb; break;
247   case CC_GT:  val = 0x4; break;
248   case CC_GTU: val = 0xc; break;
249   case CC_NE:  val = 0x5; break;
250   case CC_NEU: val = 0xd; break;
251   case CC_GE:  val = 0x6; break;
252   case CC_GEU: val = 0xe; break;
253   case CC_TR:  val = 0xf; break;
254   case CC_FL:  val = 0x0; break;
255
256   case CC_A:  val = 0x14; break;
257   case CC_NA: val = 0x13; break;
258   case CC_S:  val = 0x15; break;
259   case CC_NS: val = 0x12; break;
260   case CC_C:  val = 0x16; break;
261   case CC_NC: val = 0x11; break;
262   case CC_O:  val = 0x17; break;
263   case CC_NO: val = 0x10; break;
264
265   default:
266      val = 0;
267      assert(!"invalid condition code");
268      break;
269   }
270   code[pos / 32] |= val << (pos % 32);
271}
272
273void
274CodeEmitterNVC0::emitPredicate(const Instruction *i)
275{
276   if (i->predSrc >= 0) {
277      assert(i->getPredicate()->reg.file == FILE_PREDICATE);
278      srcId(i->src(i->predSrc), 10);
279      if (i->cc == CC_NOT_P)
280         code[0] |= 0x2000; // negate
281   } else {
282      code[0] |= 0x1c00;
283   }
284}
285
286void
287CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
288{
289   switch (src.getFile()) {
290   case FILE_MEMORY_GLOBAL:
291      srcAddr32(src, 26, 0);
292      break;
293   case FILE_MEMORY_LOCAL:
294   case FILE_MEMORY_SHARED:
295      setAddress24(src);
296      break;
297   default:
298      assert(src.getFile() == FILE_MEMORY_CONST);
299      setAddress16(src);
300      break;
301   }
302}
303
304void
305CodeEmitterNVC0::setAddress16(const ValueRef& src)
306{
307   Symbol *sym = src.get()->asSym();
308
309   assert(sym);
310
311   code[0] |= (sym->reg.data.offset & 0x003f) << 26;
312   code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
313}
314
315void
316CodeEmitterNVC0::setAddress24(const ValueRef& src)
317{
318   Symbol *sym = src.get()->asSym();
319
320   assert(sym);
321
322   code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
323   code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
324}
325
326void
327CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
328{
329   const ImmediateValue *imm = i->src(s).get()->asImm();
330   uint32_t u32;
331
332   assert(imm);
333   u32 = imm->reg.data.u32;
334
335   if ((code[0] & 0xf) == 0x1) {
336      // double immediate
337      uint64_t u64 = imm->reg.data.u64;
338      assert(!(u64 & 0x00000fffffffffffULL));
339      assert(!(code[1] & 0xc000));
340      code[0] |= ((u64 >> 44) & 0x3f) << 26;
341      code[1] |= 0xc000 | (u64 >> 50);
342   } else
343   if ((code[0] & 0xf) == 0x2) {
344      // LIMM
345      code[0] |= (u32 & 0x3f) << 26;
346      code[1] |= u32 >> 6;
347   } else
348   if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
349      // integer immediate
350      assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
351      assert(!(code[1] & 0xc000));
352      u32 &= 0xfffff;
353      code[0] |= (u32 & 0x3f) << 26;
354      code[1] |= 0xc000 | (u32 >> 6);
355   } else {
356      // float immediate
357      assert(!(u32 & 0x00000fff));
358      assert(!(code[1] & 0xc000));
359      code[0] |= ((u32 >> 12) & 0x3f) << 26;
360      code[1] |= 0xc000 | (u32 >> 18);
361   }
362}
363
364void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
365{
366   const ImmediateValue *imm = ref.get()->asImm();
367
368   int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
369
370   assert(s8 == imm->reg.data.s32);
371
372   code[0] |= (s8 & 0x3f) << 26;
373   code[0] |= (s8 >> 6) << 8;
374}
375
376void
377CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
378{
379   code[0] = opc;
380   code[1] = opc >> 32;
381
382   emitPredicate(i);
383
384   defId(i->def(0), 14);
385
386   int s1 = 26;
387   if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
388      s1 = 49;
389
390   for (int s = 0; s < 3 && i->srcExists(s); ++s) {
391      switch (i->getSrc(s)->reg.file) {
392      case FILE_MEMORY_CONST:
393         assert(!(code[1] & 0xc000));
394         code[1] |= (s == 2) ? 0x8000 : 0x4000;
395         code[1] |= i->getSrc(s)->reg.fileIndex << 10;
396         setAddress16(i->src(s));
397         break;
398      case FILE_IMMEDIATE:
399         assert(s == 1 ||
400                i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
401         assert(!(code[1] & 0xc000));
402         setImmediate(i, s);
403         break;
404      case FILE_GPR:
405         if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
406            break;
407         srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
408         break;
409      default:
410         if (i->op == OP_SELP) {
411            // OP_SELP is used to implement shared+atomics on Fermi.
412            assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
413            srcId(i->src(s), 49);
414         }
415         // ignore here, can be predicate or flags, but must not be address
416         break;
417      }
418   }
419}
420
421void
422CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
423{
424   code[0] = opc;
425   code[1] = opc >> 32;
426
427   emitPredicate(i);
428
429   defId(i->def(0), 14);
430
431   switch (i->src(0).getFile()) {
432   case FILE_MEMORY_CONST:
433      assert(!(code[1] & 0xc000));
434      code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
435      setAddress16(i->src(0));
436      break;
437   case FILE_IMMEDIATE:
438      assert(!(code[1] & 0xc000));
439      setImmediate(i, 0);
440      break;
441   case FILE_GPR:
442      srcId(i->src(0), 26);
443      break;
444   default:
445      // ignore here, can be predicate or flags, but must not be address
446      break;
447   }
448}
449
450void
451CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
452{
453   code[0] = opc;
454
455   int ss2a = 0;
456   if (opc == 0x0d || opc == 0x0e)
457      ss2a = 2;
458
459   defId(i->def(0), 14);
460   srcId(i->src(0), 20);
461
462   assert(pred || (i->predSrc < 0));
463   if (pred)
464      emitPredicate(i);
465
466   for (int s = 1; s < 3 && i->srcExists(s); ++s) {
467      if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
468         assert(!(code[0] & (0x300 >> ss2a)));
469         switch (i->src(s).get()->reg.fileIndex) {
470         case 0:  code[0] |= 0x100 >> ss2a; break;
471         case 1:  code[0] |= 0x200 >> ss2a; break;
472         case 16: code[0] |= 0x300 >> ss2a; break;
473         default:
474            ERROR("invalid c[] space for short form\n");
475            break;
476         }
477         if (s == 1)
478            code[0] |= i->getSrc(s)->reg.data.offset << 24;
479         else
480            code[0] |= i->getSrc(s)->reg.data.offset << 6;
481      } else
482      if (i->src(s).getFile() == FILE_IMMEDIATE) {
483         assert(s == 1);
484         setImmediateS8(i->src(s));
485      } else
486      if (i->src(s).getFile() == FILE_GPR) {
487         srcId(i->src(s), (s == 1) ? 26 : 8);
488      }
489   }
490}
491
492void
493CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
494{
495   if (src.getFile() == FILE_MEMORY_CONST) {
496      switch (src.get()->reg.fileIndex) {
497      case 0:  code[0] |= 0x100; break;
498      case 1:  code[0] |= 0x200; break;
499      case 16: code[0] |= 0x300; break;
500      default:
501         assert(!"unsupported file index for short op");
502         break;
503      }
504      srcAddr32(src, 20, 2);
505   } else {
506      srcId(src, 20);
507      assert(src.getFile() == FILE_GPR);
508   }
509}
510
511void
512CodeEmitterNVC0::emitNOP(const Instruction *i)
513{
514   code[0] = 0x000001e4;
515   code[1] = 0x40000000;
516   emitPredicate(i);
517}
518
519void
520CodeEmitterNVC0::emitFMAD(const Instruction *i)
521{
522   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
523
524   if (i->encSize == 8) {
525      if (isLIMM(i->src(1), TYPE_F32)) {
526         emitForm_A(i, HEX64(20000000, 00000002));
527      } else {
528         emitForm_A(i, HEX64(30000000, 00000000));
529
530         if (i->src(2).mod.neg())
531            code[0] |= 1 << 8;
532      }
533      roundMode_A(i);
534
535      if (neg1)
536         code[0] |= 1 << 9;
537
538      if (i->saturate)
539         code[0] |= 1 << 5;
540
541      if (i->dnz)
542         code[0] |= 1 << 7;
543      else
544      if (i->ftz)
545         code[0] |= 1 << 6;
546   } else {
547      assert(!i->saturate && !i->src(2).mod.neg());
548      emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
549                 false);
550      if (neg1)
551         code[0] |= 1 << 4;
552   }
553}
554
555void
556CodeEmitterNVC0::emitDMAD(const Instruction *i)
557{
558   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
559
560   emitForm_A(i, HEX64(20000000, 00000001));
561
562   if (i->src(2).mod.neg())
563      code[0] |= 1 << 8;
564
565   roundMode_A(i);
566
567   if (neg1)
568      code[0] |= 1 << 9;
569
570   assert(!i->saturate);
571   assert(!i->ftz);
572}
573
574void
575CodeEmitterNVC0::emitFMUL(const Instruction *i)
576{
577   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
578
579   assert(i->postFactor >= -3 && i->postFactor <= 3);
580
581   if (i->encSize == 8) {
582      if (isLIMM(i->src(1), TYPE_F32)) {
583         assert(i->postFactor == 0); // constant folded, hopefully
584         emitForm_A(i, HEX64(30000000, 00000002));
585      } else {
586         emitForm_A(i, HEX64(58000000, 00000000));
587         roundMode_A(i);
588         code[1] |= ((i->postFactor > 0) ?
589                     (7 - i->postFactor) : (0 - i->postFactor)) << 17;
590      }
591      if (neg)
592         code[1] ^= 1 << 25; // aliases with LIMM sign bit
593
594      if (i->saturate)
595         code[0] |= 1 << 5;
596
597      if (i->dnz)
598         code[0] |= 1 << 7;
599      else
600      if (i->ftz)
601         code[0] |= 1 << 6;
602   } else {
603      assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
604      emitForm_S(i, 0xa8, true);
605   }
606}
607
608void
609CodeEmitterNVC0::emitDMUL(const Instruction *i)
610{
611   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
612
613   emitForm_A(i, HEX64(50000000, 00000001));
614   roundMode_A(i);
615
616   if (neg)
617      code[0] |= 1 << 9;
618
619   assert(!i->saturate);
620   assert(!i->ftz);
621   assert(!i->dnz);
622   assert(!i->postFactor);
623}
624
625void
626CodeEmitterNVC0::emitUMUL(const Instruction *i)
627{
628   if (i->encSize == 8) {
629      if (i->src(1).getFile() == FILE_IMMEDIATE) {
630         emitForm_A(i, HEX64(10000000, 00000002));
631      } else {
632         emitForm_A(i, HEX64(50000000, 00000003));
633      }
634      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
635         code[0] |= 1 << 6;
636      if (i->sType == TYPE_S32)
637         code[0] |= 1 << 5;
638      if (i->dType == TYPE_S32)
639         code[0] |= 1 << 7;
640   } else {
641      emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
642
643      if (i->sType == TYPE_S32)
644         code[0] |= 1 << 6;
645   }
646}
647
648void
649CodeEmitterNVC0::emitFADD(const Instruction *i)
650{
651   if (i->encSize == 8) {
652      if (isLIMM(i->src(1), TYPE_F32)) {
653         assert(!i->saturate);
654         emitForm_A(i, HEX64(28000000, 00000002));
655
656         code[0] |= i->src(0).mod.abs() << 7;
657         code[0] |= i->src(0).mod.neg() << 9;
658
659         if (i->src(1).mod.abs())
660            code[1] &= 0xfdffffff;
661         if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
662            code[1] ^= 0x02000000;
663      } else {
664         emitForm_A(i, HEX64(50000000, 00000000));
665
666         roundMode_A(i);
667         if (i->saturate)
668            code[1] |= 1 << 17;
669
670         emitNegAbs12(i);
671         if (i->op == OP_SUB) code[0] ^= 1 << 8;
672      }
673      if (i->ftz)
674         code[0] |= 1 << 5;
675   } else {
676      assert(!i->saturate && i->op != OP_SUB &&
677             !i->src(0).mod.abs() &&
678             !i->src(1).mod.neg() && !i->src(1).mod.abs());
679
680      emitForm_S(i, 0x49, true);
681
682      if (i->src(0).mod.neg())
683         code[0] |= 1 << 7;
684   }
685}
686
687void
688CodeEmitterNVC0::emitDADD(const Instruction *i)
689{
690   assert(i->encSize == 8);
691   emitForm_A(i, HEX64(48000000, 00000001));
692   roundMode_A(i);
693   assert(!i->saturate);
694   assert(!i->ftz);
695   emitNegAbs12(i);
696   if (i->op == OP_SUB)
697      code[0] ^= 1 << 8;
698}
699
700void
701CodeEmitterNVC0::emitUADD(const Instruction *i)
702{
703   uint32_t addOp = 0;
704
705   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
706
707   if (i->src(0).mod.neg())
708      addOp |= 0x200;
709   if (i->src(1).mod.neg())
710      addOp |= 0x100;
711   if (i->op == OP_SUB)
712      addOp ^= 0x100;
713
714   assert(addOp != 0x300); // would be add-plus-one
715
716   if (i->encSize == 8) {
717      if (isLIMM(i->src(1), TYPE_U32)) {
718         emitForm_A(i, HEX64(08000000, 00000002));
719         if (i->defExists(1))
720            code[1] |= 1 << 26; // write carry
721      } else {
722         emitForm_A(i, HEX64(48000000, 00000003));
723         if (i->defExists(1))
724            code[1] |= 1 << 16; // write carry
725      }
726      code[0] |= addOp;
727
728      if (i->saturate)
729         code[0] |= 1 << 5;
730      if (i->flagsSrc >= 0) // add carry
731         code[0] |= 1 << 6;
732   } else {
733      assert(!(addOp & 0x100));
734      emitForm_S(i, (addOp >> 3) |
735                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
736   }
737}
738
739void
740CodeEmitterNVC0::emitIMAD(const Instruction *i)
741{
742   uint8_t addOp =
743      i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
744
745   assert(i->encSize == 8);
746   emitForm_A(i, HEX64(20000000, 00000003));
747
748   assert(addOp != 3);
749   code[0] |= addOp << 8;
750
751   if (isSignedType(i->dType))
752      code[0] |= 1 << 7;
753   if (isSignedType(i->sType))
754      code[0] |= 1 << 5;
755
756   code[1] |= i->saturate << 24;
757
758   if (i->flagsDef >= 0) code[1] |= 1 << 16;
759   if (i->flagsSrc >= 0) code[1] |= 1 << 23;
760
761   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
762      code[0] |= 1 << 6;
763}
764
765void
766CodeEmitterNVC0::emitSHLADD(const Instruction *i)
767{
768   uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
769   const ImmediateValue *imm = i->src(1).get()->asImm();
770   assert(imm);
771
772   code[0] = 0x00000003;
773   code[1] = 0x40000000 | addOp << 23;
774
775   emitPredicate(i);
776
777   defId(i->def(0), 14);
778   srcId(i->src(0), 20);
779
780   if (i->flagsDef >= 0)
781      code[1] |= 1 << 16;
782
783   assert(!(imm->reg.data.u32 & 0xffffffe0));
784   code[0] |= imm->reg.data.u32 << 5;
785
786   switch (i->src(2).getFile()) {
787   case FILE_GPR:
788      srcId(i->src(2), 26);
789      break;
790   case FILE_MEMORY_CONST:
791      code[1] |= 0x4000;
792      code[1] |= i->getSrc(2)->reg.fileIndex << 10;
793      setAddress16(i->src(2));
794      break;
795   case FILE_IMMEDIATE:
796      setImmediate(i, 2);
797      break;
798   default:
799      assert(!"bad src2 file");
800      break;
801   }
802}
803
804void
805CodeEmitterNVC0::emitMADSP(const Instruction *i)
806{
807   assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
808
809   emitForm_A(i, HEX64(00000000, 00000003));
810
811   if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
812      code[1] |= 0x01800000;
813   } else {
814      code[0] |= (i->subOp & 0x00f) << 7;
815      code[0] |= (i->subOp & 0x0f0) << 1;
816      code[0] |= (i->subOp & 0x100) >> 3;
817      code[0] |= (i->subOp & 0x200) >> 2;
818      code[1] |= (i->subOp & 0xc00) << 13;
819   }
820
821   if (i->flagsDef >= 0)
822      code[1] |= 1 << 16;
823}
824
825void
826CodeEmitterNVC0::emitISAD(const Instruction *i)
827{
828   assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
829   assert(i->encSize == 8);
830
831   emitForm_A(i, HEX64(38000000, 00000003));
832
833   if (i->dType == TYPE_S32)
834      code[0] |= 1 << 5;
835}
836
837void
838CodeEmitterNVC0::emitNOT(Instruction *i)
839{
840   assert(i->encSize == 8);
841   i->setSrc(1, i->src(0));
842   emitForm_A(i, HEX64(68000000, 000001c3));
843}
844
845void
846CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
847{
848   if (i->def(0).getFile() == FILE_PREDICATE) {
849      code[0] = 0x00000004 | (subOp << 30);
850      code[1] = 0x0c000000;
851
852      emitPredicate(i);
853
854      defId(i->def(0), 17);
855      srcId(i->src(0), 20);
856      if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
857      srcId(i->src(1), 26);
858      if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
859
860      if (i->defExists(1)) {
861         defId(i->def(1), 14);
862      } else {
863         code[0] |= 7 << 14;
864      }
865      // (a OP b) OP c
866      if (i->predSrc != 2 && i->srcExists(2)) {
867         code[1] |= subOp << 21;
868         srcId(i->src(2), 49);
869         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
870      } else {
871         code[1] |= 0x000e0000;
872      }
873   } else
874   if (i->encSize == 8) {
875      if (isLIMM(i->src(1), TYPE_U32)) {
876         emitForm_A(i, HEX64(38000000, 00000002));
877
878         if (i->flagsDef >= 0)
879            code[1] |= 1 << 26;
880      } else {
881         emitForm_A(i, HEX64(68000000, 00000003));
882
883         if (i->flagsDef >= 0)
884            code[1] |= 1 << 16;
885      }
886      code[0] |= subOp << 6;
887
888      if (i->flagsSrc >= 0) // carry
889         code[0] |= 1 << 5;
890
891      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
892      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
893   } else {
894      emitForm_S(i, (subOp << 5) |
895                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
896   }
897}
898
899void
900CodeEmitterNVC0::emitPOPC(const Instruction *i)
901{
902   emitForm_A(i, HEX64(54000000, 00000004));
903
904   if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
905   if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
906}
907
908void
909CodeEmitterNVC0::emitINSBF(const Instruction *i)
910{
911   emitForm_A(i, HEX64(28000000, 00000003));
912}
913
914void
915CodeEmitterNVC0::emitEXTBF(const Instruction *i)
916{
917   emitForm_A(i, HEX64(70000000, 00000003));
918
919   if (i->dType == TYPE_S32)
920      code[0] |= 1 << 5;
921   if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
922      code[0] |= 1 << 8;
923}
924
925void
926CodeEmitterNVC0::emitBFIND(const Instruction *i)
927{
928   emitForm_B(i, HEX64(78000000, 00000003));
929
930   if (i->dType == TYPE_S32)
931      code[0] |= 1 << 5;
932   if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
933      code[0] |= 1 << 8;
934   if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
935      code[0] |= 1 << 6;
936}
937
938void
939CodeEmitterNVC0::emitPERMT(const Instruction *i)
940{
941   emitForm_A(i, HEX64(24000000, 00000004));
942
943   code[0] |= i->subOp << 5;
944}
945
946void
947CodeEmitterNVC0::emitShift(const Instruction *i)
948{
949   if (i->op == OP_SHR) {
950      emitForm_A(i, HEX64(58000000, 00000003)
951                 | (isSignedType(i->dType) ? 0x20 : 0x00));
952   } else {
953      emitForm_A(i, HEX64(60000000, 00000003));
954   }
955
956   if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
957      code[0] |= 1 << 9;
958}
959
960void
961CodeEmitterNVC0::emitPreOp(const Instruction *i)
962{
963   if (i->encSize == 8) {
964      emitForm_B(i, HEX64(60000000, 00000000));
965
966      if (i->op == OP_PREEX2)
967         code[0] |= 0x20;
968
969      if (i->src(0).mod.abs()) code[0] |= 1 << 6;
970      if (i->src(0).mod.neg()) code[0] |= 1 << 8;
971   } else {
972      emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
973   }
974}
975
976void
977CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
978{
979   if (i->encSize == 8) {
980      code[0] = 0x00000000 | (subOp << 26);
981      code[1] = 0xc8000000;
982
983      emitPredicate(i);
984
985      defId(i->def(0), 14);
986      srcId(i->src(0), 20);
987
988      assert(i->src(0).getFile() == FILE_GPR);
989
990      if (i->saturate) code[0] |= 1 << 5;
991
992      if (i->src(0).mod.abs()) code[0] |= 1 << 7;
993      if (i->src(0).mod.neg()) code[0] |= 1 << 9;
994   } else {
995      emitForm_S(i, 0x80000008 | (subOp << 26), true);
996
997      assert(!i->src(0).mod.neg());
998      if (i->src(0).mod.abs()) code[0] |= 1 << 30;
999   }
1000}
1001
1002void
1003CodeEmitterNVC0::emitMINMAX(const Instruction *i)
1004{
1005   uint64_t op;
1006
1007   assert(i->encSize == 8);
1008
1009   op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
1010
1011   if (i->ftz)
1012      op |= 1 << 5;
1013   else
1014   if (!isFloatType(i->dType))
1015      op |= isSignedType(i->dType) ? 0x23 : 0x03;
1016   if (i->dType == TYPE_F64)
1017      op |= 0x01;
1018
1019   emitForm_A(i, op);
1020   emitNegAbs12(i);
1021}
1022
1023void
1024CodeEmitterNVC0::roundMode_C(const Instruction *i)
1025{
1026   switch (i->rnd) {
1027   case ROUND_M:  code[1] |= 1 << 17; break;
1028   case ROUND_P:  code[1] |= 2 << 17; break;
1029   case ROUND_Z:  code[1] |= 3 << 17; break;
1030   case ROUND_NI: code[0] |= 1 << 7; break;
1031   case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
1032   case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
1033   case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
1034   case ROUND_N: break;
1035   default:
1036      assert(!"invalid round mode");
1037      break;
1038   }
1039}
1040
1041void
1042CodeEmitterNVC0::roundMode_CS(const Instruction *i)
1043{
1044   switch (i->rnd) {
1045   case ROUND_M:
1046   case ROUND_MI: code[0] |= 1 << 16; break;
1047   case ROUND_P:
1048   case ROUND_PI: code[0] |= 2 << 16; break;
1049   case ROUND_Z:
1050   case ROUND_ZI: code[0] |= 3 << 16; break;
1051   default:
1052      break;
1053   }
1054}
1055
1056void
1057CodeEmitterNVC0::emitCVT(Instruction *i)
1058{
1059   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1060   DataType dType;
1061
1062   switch (i->op) {
1063   case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1064   case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1065   case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1066   default:
1067      break;
1068   }
1069
1070   const bool sat = (i->op == OP_SAT) || i->saturate;
1071   const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1072   const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1073
1074   if (i->op == OP_NEG && i->dType == TYPE_U32)
1075      dType = TYPE_S32;
1076   else
1077      dType = i->dType;
1078
1079   if (i->encSize == 8) {
1080      emitForm_B(i, HEX64(10000000, 00000004));
1081
1082      roundMode_C(i);
1083
1084      // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1085      code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1086      code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1087
1088      // for 8/16 source types, the byte/word is in subOp. word 1 is
1089      // represented as 2.
1090      if (!isFloatType(i->sType))
1091         code[1] |= i->subOp << 0x17;
1092      else
1093         code[1] |= i->subOp << 0x18;
1094
1095      if (sat)
1096         code[0] |= 0x20;
1097      if (abs)
1098         code[0] |= 1 << 6;
1099      if (neg && i->op != OP_ABS)
1100         code[0] |= 1 << 8;
1101
1102      if (i->ftz)
1103         code[1] |= 1 << 23;
1104
1105      if (isSignedIntType(dType))
1106         code[0] |= 0x080;
1107      if (isSignedIntType(i->sType))
1108         code[0] |= 0x200;
1109
1110      if (isFloatType(dType)) {
1111         if (!isFloatType(i->sType))
1112            code[1] |= 0x08000000;
1113      } else {
1114         if (isFloatType(i->sType))
1115            code[1] |= 0x04000000;
1116         else
1117            code[1] |= 0x0c000000;
1118      }
1119   } else {
1120      if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1121         code[0] = 0x298;
1122      } else
1123      if (isFloatType(dType)) {
1124         if (isFloatType(i->sType))
1125            code[0] = 0x098;
1126         else
1127            code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1128      } else {
1129         assert(isFloatType(i->sType));
1130
1131         code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1132      }
1133
1134      if (neg) code[0] |= 1 << 16;
1135      if (sat) code[0] |= 1 << 18;
1136      if (abs) code[0] |= 1 << 19;
1137
1138      roundMode_CS(i);
1139   }
1140}
1141
1142void
1143CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1144{
1145   uint32_t hi;
1146   uint32_t lo = 0;
1147
1148   if (i->sType == TYPE_F64)
1149      lo = 0x1;
1150   else
1151   if (!isFloatType(i->sType))
1152      lo = 0x3;
1153
1154   if (isSignedIntType(i->sType))
1155      lo |= 0x20;
1156   if (isFloatType(i->dType)) {
1157      if (isFloatType(i->sType))
1158         lo |= 0x20;
1159      else
1160         lo |= 0x80;
1161   }
1162
1163   switch (i->op) {
1164   case OP_SET_AND: hi = 0x10000000; break;
1165   case OP_SET_OR:  hi = 0x10200000; break;
1166   case OP_SET_XOR: hi = 0x10400000; break;
1167   default:
1168      hi = 0x100e0000;
1169      break;
1170   }
1171   emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1172
1173   if (i->op != OP_SET)
1174      srcId(i->src(2), 32 + 17);
1175
1176   if (i->def(0).getFile() == FILE_PREDICATE) {
1177      if (i->sType == TYPE_F32)
1178         code[1] += 0x10000000;
1179      else
1180         code[1] += 0x08000000;
1181
1182      code[0] &= ~0xfc000;
1183      defId(i->def(0), 17);
1184      if (i->defExists(1))
1185         defId(i->def(1), 14);
1186      else
1187         code[0] |= 0x1c000;
1188   }
1189
1190   if (i->ftz)
1191      code[1] |= 1 << 27;
1192
1193   emitCondCode(i->setCond, 32 + 23);
1194   emitNegAbs12(i);
1195}
1196
1197void
1198CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1199{
1200   uint64_t op;
1201
1202   switch (i->dType) {
1203   case TYPE_S32:
1204      op = HEX64(30000000, 00000023);
1205      break;
1206   case TYPE_U32:
1207      op = HEX64(30000000, 00000003);
1208      break;
1209   case TYPE_F32:
1210      op = HEX64(38000000, 00000000);
1211      break;
1212   default:
1213      assert(!"invalid type for SLCT");
1214      op = 0;
1215      break;
1216   }
1217   emitForm_A(i, op);
1218
1219   CondCode cc = i->setCond;
1220
1221   if (i->src(2).mod.neg())
1222      cc = reverseCondCode(cc);
1223
1224   emitCondCode(cc, 32 + 23);
1225
1226   if (i->ftz)
1227      code[0] |= 1 << 5;
1228}
1229
1230static void
1231selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1232{
1233   int loc = entry->loc;
1234   if (data.force_persample_interp)
1235      code[loc + 1] |= 1 << 20;
1236   else
1237      code[loc + 1] &= ~(1 << 20);
1238}
1239
1240void CodeEmitterNVC0::emitSELP(const Instruction *i)
1241{
1242   emitForm_A(i, HEX64(20000000, 00000004));
1243
1244   if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1245      code[1] |= 1 << 20;
1246
1247   if (i->subOp == 1) {
1248      addInterp(0, 0, selpFlip);
1249   }
1250}
1251
1252void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1253{
1254   code[0] = 0x00000006 | (i->subOp << 26);
1255   code[1] = 0xf0000000;
1256   emitPredicate(i);
1257   emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1258}
1259
1260void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1261{
1262   code[0] = 0x00000086;
1263   code[1] = 0xd0000000;
1264
1265   code[1] |= i->tex.r;
1266   code[1] |= i->tex.s << 8;
1267
1268   if (i->tex.liveOnly)
1269      code[0] |= 1 << 9;
1270
1271   defId(i->def(0), 14);
1272   srcId(i->src(0), 20);
1273}
1274
1275static inline bool
1276isNextIndependentTex(const TexInstruction *i)
1277{
1278   if (!i->next || !isTextureOp(i->next->op))
1279      return false;
1280   if (i->getDef(0)->interfers(i->next->getSrc(0)))
1281      return false;
1282   return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1283}
1284
1285void
1286CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1287{
1288   code[0] = 0x00000006;
1289
1290   if (isNextIndependentTex(i))
1291      code[0] |= 0x080; // t mode
1292   else
1293      code[0] |= 0x100; // p mode
1294
1295   if (i->tex.liveOnly)
1296      code[0] |= 1 << 9;
1297
1298   switch (i->op) {
1299   case OP_TEX: code[1] = 0x80000000; break;
1300   case OP_TXB: code[1] = 0x84000000; break;
1301   case OP_TXL: code[1] = 0x86000000; break;
1302   case OP_TXF: code[1] = 0x90000000; break;
1303   case OP_TXG: code[1] = 0xa0000000; break;
1304   case OP_TXLQ: code[1] = 0xb0000000; break;
1305   case OP_TXD: code[1] = 0xe0000000; break;
1306   default:
1307      assert(!"invalid texture op");
1308      break;
1309   }
1310   if (i->op == OP_TXF) {
1311      if (!i->tex.levelZero)
1312         code[1] |= 0x02000000;
1313   } else
1314   if (i->tex.levelZero) {
1315      code[1] |= 0x02000000;
1316   }
1317
1318   if (i->op != OP_TXD && i->tex.derivAll)
1319      code[1] |= 1 << 13;
1320
1321   defId(i->def(0), 14);
1322   srcId(i->src(0), 20);
1323
1324   emitPredicate(i);
1325
1326   if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1327
1328   code[1] |= i->tex.mask << 14;
1329
1330   code[1] |= i->tex.r;
1331   code[1] |= i->tex.s << 8;
1332   if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1333      code[1] |= 1 << 18; // in 1st source (with array index)
1334
1335   // texture target:
1336   code[1] |= (i->tex.target.getDim() - 1) << 20;
1337   if (i->tex.target.isCube())
1338      code[1] += 2 << 20;
1339   if (i->tex.target.isArray())
1340      code[1] |= 1 << 19;
1341   if (i->tex.target.isShadow())
1342      code[1] |= 1 << 24;
1343
1344   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1345
1346   if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1347      // lzero
1348      if (i->op == OP_TXL)
1349         code[1] &= ~(1 << 26);
1350      else
1351      if (i->op == OP_TXF)
1352         code[1] &= ~(1 << 25);
1353   }
1354   if (i->tex.target == TEX_TARGET_2D_MS ||
1355       i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1356      code[1] |= 1 << 23;
1357
1358   if (i->tex.useOffsets == 1)
1359      code[1] |= 1 << 22;
1360   if (i->tex.useOffsets == 4)
1361      code[1] |= 1 << 23;
1362
1363   srcId(i, src1, 26);
1364}
1365
1366void
1367CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1368{
1369   code[0] = 0x00000086;
1370   code[1] = 0xc0000000;
1371
1372   switch (i->tex.query) {
1373   case TXQ_DIMS:            code[1] |= 0 << 22; break;
1374   case TXQ_TYPE:            code[1] |= 1 << 22; break;
1375   case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1376   case TXQ_FILTER:          code[1] |= 3 << 22; break;
1377   case TXQ_LOD:             code[1] |= 4 << 22; break;
1378   case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1379   default:
1380      assert(!"invalid texture query");
1381      break;
1382   }
1383
1384   code[1] |= i->tex.mask << 14;
1385
1386   code[1] |= i->tex.r;
1387   code[1] |= i->tex.s << 8;
1388   if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1389      code[1] |= 1 << 18;
1390
1391   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1392
1393   defId(i->def(0), 14);
1394   srcId(i->src(0), 20);
1395   srcId(i, src1, 26);
1396
1397   emitPredicate(i);
1398}
1399
1400void
1401CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1402{
1403   code[0] = 0x00000200 | (laneMask << 6); // dall
1404   code[1] = 0x48000000 | qOp;
1405
1406   defId(i->def(0), 14);
1407   srcId(i->src(0), 20);
1408   srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
1409
1410   emitPredicate(i);
1411}
1412
1413void
1414CodeEmitterNVC0::emitFlow(const Instruction *i)
1415{
1416   const FlowInstruction *f = i->asFlow();
1417
1418   unsigned mask; // bit 0: predicate, bit 1: target
1419
1420   code[0] = 0x00000007;
1421
1422   switch (i->op) {
1423   case OP_BRA:
1424      code[1] = f->absolute ? 0x00000000 : 0x40000000;
1425      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1426         code[0] |= 0x4000;
1427      mask = 3;
1428      break;
1429   case OP_CALL:
1430      code[1] = f->absolute ? 0x10000000 : 0x50000000;
1431      if (f->indirect)
1432         code[0] |= 0x4000; // indirect calls always use c[] source
1433      mask = 2;
1434      break;
1435
1436   case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1437   case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1438   case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1439   case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1440   case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1441
1442   case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1443   case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1444   case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1445   case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1446
1447   case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1448   case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1449   case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1450   default:
1451      assert(!"invalid flow operation");
1452      return;
1453   }
1454
1455   if (mask & 1) {
1456      emitPredicate(i);
1457      if (i->flagsSrc < 0)
1458         code[0] |= 0x1e0;
1459   }
1460
1461   if (!f)
1462      return;
1463
1464   if (f->allWarp)
1465      code[0] |= 1 << 15;
1466   if (f->limit)
1467      code[0] |= 1 << 16;
1468
1469   if (f->indirect) {
1470      if (code[0] & 0x4000) {
1471         assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1472         setAddress16(i->src(0));
1473         code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1474         if (f->op == OP_BRA)
1475            srcId(f->src(0).getIndirect(0), 20);
1476      } else {
1477         srcId(f, 0, 20);
1478      }
1479   }
1480
1481   if (f->op == OP_CALL) {
1482      if (f->indirect) {
1483         // nothing
1484      } else
1485      if (f->builtin) {
1486         assert(f->absolute);
1487         uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1488         addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1489         addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1490      } else {
1491         assert(!f->absolute);
1492         int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1493         code[0] |= (pcRel & 0x3f) << 26;
1494         code[1] |= (pcRel >> 6) & 0x3ffff;
1495      }
1496   } else
1497   if (mask & 2) {
1498      int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1499      if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1500         pcRel += 8;
1501      // currently we don't want absolute branches
1502      assert(!f->absolute);
1503      code[0] |= (pcRel & 0x3f) << 26;
1504      code[1] |= (pcRel >> 6) & 0x3ffff;
1505   }
1506}
1507
1508void
1509CodeEmitterNVC0::emitBAR(const Instruction *i)
1510{
1511   Value *rDef = NULL, *pDef = NULL;
1512
1513   switch (i->subOp) {
1514   case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
1515   case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
1516   case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
1517   case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1518   default:
1519      code[0] = 0x04;
1520      assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1521      break;
1522   }
1523   code[1] = 0x50000000;
1524
1525   code[0] |= 63 << 14;
1526   code[1] |= 7 << 21;
1527
1528   emitPredicate(i);
1529
1530   // barrier id
1531   if (i->src(0).getFile() == FILE_GPR) {
1532      srcId(i->src(0), 20);
1533   } else {
1534      ImmediateValue *imm = i->getSrc(0)->asImm();
1535      assert(imm);
1536      code[0] |= imm->reg.data.u32 << 20;
1537      code[1] |= 0x8000;
1538   }
1539
1540   // thread count
1541   if (i->src(1).getFile() == FILE_GPR) {
1542      srcId(i->src(1), 26);
1543   } else {
1544      ImmediateValue *imm = i->getSrc(1)->asImm();
1545      assert(imm);
1546      assert(imm->reg.data.u32 <= 0xfff);
1547      code[0] |= imm->reg.data.u32 << 26;
1548      code[1] |= imm->reg.data.u32 >> 6;
1549      code[1] |= 0x4000;
1550   }
1551
1552   if (i->srcExists(2) && (i->predSrc != 2)) {
1553      srcId(i->src(2), 32 + 17);
1554      if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1555         code[1] |= 1 << 20;
1556   } else {
1557      code[1] |= 7 << 17;
1558   }
1559
1560   if (i->defExists(0)) {
1561      if (i->def(0).getFile() == FILE_GPR)
1562         rDef = i->getDef(0);
1563      else
1564         pDef = i->getDef(0);
1565
1566      if (i->defExists(1)) {
1567         if (i->def(1).getFile() == FILE_GPR)
1568            rDef = i->getDef(1);
1569         else
1570            pDef = i->getDef(1);
1571      }
1572   }
1573   if (rDef) {
1574      code[0] &= ~(63 << 14);
1575      defId(rDef, 14);
1576   }
1577   if (pDef) {
1578      code[1] &= ~(7 << 21);
1579      defId(pDef, 32 + 21);
1580   }
1581}
1582
1583void
1584CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1585{
1586   code[0] = 0x00000006;
1587   code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1588
1589   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1590      code[0] |= 0x200;
1591
1592   emitPredicate(i);
1593
1594   defId(i->def(0), 14);
1595   srcId(i->src(0).getIndirect(0), 20);
1596}
1597
1598void
1599CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1600{
1601   uint32_t prim = i->src(0).get()->reg.data.u32;
1602
1603   code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1604   code[1] = 0x00000000 | (prim >> 6);
1605
1606   emitPredicate(i);
1607
1608   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1609
1610   defId(i->def(0), 14);
1611   srcId(i, src1, 20);
1612}
1613
1614void
1615CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1616{
1617   code[0] = 0x00000006;
1618   code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1619
1620   if (i->perPatch)
1621      code[0] |= 0x100;
1622   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1623      code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1624
1625   emitPredicate(i);
1626
1627   code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1628
1629   defId(i->def(0), 14);
1630   srcId(i->src(0).getIndirect(0), 20);
1631   srcId(i->src(0).getIndirect(1), 26); // vertex address
1632}
1633
1634void
1635CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1636{
1637   unsigned int size = typeSizeof(i->dType);
1638
1639   code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1640   code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1641
1642   assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1643
1644   if (i->perPatch)
1645      code[0] |= 0x100;
1646
1647   emitPredicate(i);
1648
1649   assert(i->src(1).getFile() == FILE_GPR);
1650
1651   srcId(i->src(0).getIndirect(0), 20);
1652   srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1653   srcId(i->src(1), 26);
1654}
1655
1656void
1657CodeEmitterNVC0::emitOUT(const Instruction *i)
1658{
1659   code[0] = 0x00000006;
1660   code[1] = 0x1c000000;
1661
1662   emitPredicate(i);
1663
1664   defId(i->def(0), 14); // new secret address
1665   srcId(i->src(0), 20); // old secret address, should be 0 initially
1666
1667   assert(i->src(0).getFile() == FILE_GPR);
1668
1669   if (i->op == OP_EMIT)
1670      code[0] |= 1 << 5;
1671   if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1672      code[0] |= 1 << 6;
1673
1674   // vertex stream
1675   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1676      unsigned int stream = SDATA(i->src(1)).u32;
1677      assert(stream < 4);
1678      if (stream) {
1679         code[1] |= 0xc000;
1680         code[0] |= stream << 26;
1681      } else {
1682         srcId(NULL, 26);
1683      }
1684   } else {
1685      srcId(i->src(1), 26);
1686   }
1687}
1688
1689void
1690CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1691{
1692   if (i->encSize == 8) {
1693      code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1694   } else {
1695      if (i->getInterpMode() == NV50_IR_INTERP_SC)
1696         code[0] |= 0x80;
1697      assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1698   }
1699}
1700
1701static void
1702interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1703{
1704   int ipa = entry->ipa;
1705   int reg = entry->reg;
1706   int loc = entry->loc;
1707
1708   if (data.flatshade &&
1709       (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1710      ipa = NV50_IR_INTERP_FLAT;
1711      reg = 0x3f;
1712   } else if (data.force_persample_interp &&
1713              (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1714              (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1715      ipa |= NV50_IR_INTERP_CENTROID;
1716   }
1717   code[loc + 0] &= ~(0xf << 6);
1718   code[loc + 0] |= ipa << 6;
1719   code[loc + 0] &= ~(0x3f << 26);
1720   code[loc + 0] |= reg << 26;
1721}
1722
1723void
1724CodeEmitterNVC0::emitINTERP(const Instruction *i)
1725{
1726   const uint32_t base = i->getSrc(0)->reg.data.offset;
1727
1728   if (i->encSize == 8) {
1729      code[0] = 0x00000000;
1730      code[1] = 0xc0000000 | (base & 0xffff);
1731
1732      if (i->saturate)
1733         code[0] |= 1 << 5;
1734
1735      if (i->op == OP_PINTERP) {
1736         srcId(i->src(1), 26);
1737         addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1738      } else {
1739         code[0] |= 0x3f << 26;
1740         addInterp(i->ipa, 0x3f, interpApply);
1741      }
1742
1743      srcId(i->src(0).getIndirect(0), 20);
1744   } else {
1745      assert(i->op == OP_PINTERP);
1746      code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1747      srcId(i->src(1), 20);
1748   }
1749   emitInterpMode(i);
1750
1751   emitPredicate(i);
1752   defId(i->def(0), 14);
1753
1754   if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1755      srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1756   else
1757      code[1] |= 0x3f << 17;
1758}
1759
1760void
1761CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1762{
1763   uint8_t val;
1764
1765   switch (ty) {
1766   case TYPE_U8:
1767      val = 0x00;
1768      break;
1769   case TYPE_S8:
1770      val = 0x20;
1771      break;
1772   case TYPE_F16:
1773   case TYPE_U16:
1774      val = 0x40;
1775      break;
1776   case TYPE_S16:
1777      val = 0x60;
1778      break;
1779   case TYPE_F32:
1780   case TYPE_U32:
1781   case TYPE_S32:
1782      val = 0x80;
1783      break;
1784   case TYPE_F64:
1785   case TYPE_U64:
1786   case TYPE_S64:
1787      val = 0xa0;
1788      break;
1789   case TYPE_B128:
1790      val = 0xc0;
1791      break;
1792   default:
1793      val = 0x80;
1794      assert(!"invalid type");
1795      break;
1796   }
1797   code[0] |= val;
1798}
1799
1800void
1801CodeEmitterNVC0::emitCachingMode(CacheMode c)
1802{
1803   uint32_t val;
1804
1805   switch (c) {
1806   case CACHE_CA:
1807// case CACHE_WB:
1808      val = 0x000;
1809      break;
1810   case CACHE_CG:
1811      val = 0x100;
1812      break;
1813   case CACHE_CS:
1814      val = 0x200;
1815      break;
1816   case CACHE_CV:
1817// case CACHE_WT:
1818      val = 0x300;
1819      break;
1820   default:
1821      val = 0;
1822      assert(!"invalid caching mode");
1823      break;
1824   }
1825   code[0] |= val;
1826}
1827
1828static inline bool
1829uses64bitAddress(const Instruction *ldst)
1830{
1831   return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1832      ldst->src(0).isIndirect(0) &&
1833      ldst->getIndirect(0, 0)->reg.size == 8;
1834}
1835
1836void
1837CodeEmitterNVC0::emitSTORE(const Instruction *i)
1838{
1839   uint32_t opc;
1840
1841   switch (i->src(0).getFile()) {
1842   case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1843   case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1844   case FILE_MEMORY_SHARED:
1845      if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1846         if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1847            opc = 0xb8000000;
1848         else
1849            opc = 0xcc000000;
1850      } else {
1851         opc = 0xc9000000;
1852      }
1853      break;
1854   default:
1855      assert(!"invalid memory file");
1856      opc = 0;
1857      break;
1858   }
1859   code[0] = 0x00000005;
1860   code[1] = opc;
1861
1862   if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
1863      // Unlocked store on shared memory can fail.
1864      if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
1865          i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1866         assert(i->defExists(0));
1867         defId(i->def(0), 8);
1868      }
1869   }
1870
1871   setAddressByFile(i->src(0));
1872   srcId(i->src(1), 14);
1873   srcId(i->src(0).getIndirect(0), 20);
1874   if (uses64bitAddress(i))
1875      code[1] |= 1 << 26;
1876
1877   emitPredicate(i);
1878
1879   emitLoadStoreType(i->dType);
1880   emitCachingMode(i->cache);
1881}
1882
1883void
1884CodeEmitterNVC0::emitLOAD(const Instruction *i)
1885{
1886   uint32_t opc;
1887
1888   code[0] = 0x00000005;
1889
1890   switch (i->src(0).getFile()) {
1891   case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1892   case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1893   case FILE_MEMORY_SHARED:
1894      if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1895         if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1896            opc = 0xa8000000;
1897         else
1898            opc = 0xc4000000;
1899      } else {
1900         opc = 0xc1000000;
1901      }
1902      break;
1903   case FILE_MEMORY_CONST:
1904      if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1905         emitMOV(i); // not sure if this is any better
1906         return;
1907      }
1908      opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1909      code[0] = 0x00000006 | (i->subOp << 8);
1910      break;
1911   default:
1912      assert(!"invalid memory file");
1913      opc = 0;
1914      break;
1915   }
1916   code[1] = opc;
1917
1918   int r = 0, p = -1;
1919   if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
1920      if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1921         if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1922            r = -1;
1923            p = 0;
1924         } else if (i->defExists(1)) { // r, p
1925            p = 1;
1926         } else {
1927            assert(!"Expected predicate dest for load locked");
1928         }
1929      }
1930   }
1931
1932   if (r >= 0)
1933      defId(i->def(r), 14);
1934   else
1935      code[0] |= 63 << 14;
1936
1937   if (p >= 0) {
1938      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1939         defId(i->def(p), 8);
1940      else
1941         defId(i->def(p), 32 + 18);
1942   }
1943
1944   setAddressByFile(i->src(0));
1945   srcId(i->src(0).getIndirect(0), 20);
1946   if (uses64bitAddress(i))
1947      code[1] |= 1 << 26;
1948
1949   emitPredicate(i);
1950
1951   emitLoadStoreType(i->dType);
1952   emitCachingMode(i->cache);
1953}
1954
1955uint8_t
1956CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1957{
1958   switch (SDATA(ref).sv.sv) {
1959   case SV_LANEID:        return 0x00;
1960   case SV_PHYSID:        return 0x03;
1961   case SV_VERTEX_COUNT:  return 0x10;
1962   case SV_INVOCATION_ID: return 0x11;
1963   case SV_YDIR:          return 0x12;
1964   case SV_THREAD_KILL:   return 0x13;
1965   case SV_TID:           return 0x21 + SDATA(ref).sv.index;
1966   case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
1967   case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
1968   case SV_GRIDID:        return 0x2c;
1969   case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
1970   case SV_LBASE:         return 0x34;
1971   case SV_SBASE:         return 0x30;
1972   case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
1973   default:
1974      assert(!"no sreg for system value");
1975      return 0;
1976   }
1977}
1978
1979void
1980CodeEmitterNVC0::emitMOV(const Instruction *i)
1981{
1982   if (i->def(0).getFile() == FILE_PREDICATE) {
1983      if (i->src(0).getFile() == FILE_GPR) {
1984         code[0] = 0xfc01c003;
1985         code[1] = 0x1a8e0000;
1986         srcId(i->src(0), 20);
1987      } else {
1988         code[0] = 0x0001c004;
1989         code[1] = 0x0c0e0000;
1990         if (i->src(0).getFile() == FILE_IMMEDIATE) {
1991            code[0] |= 7 << 20;
1992            if (!i->getSrc(0)->reg.data.u32)
1993               code[0] |= 1 << 23;
1994         } else {
1995            srcId(i->src(0), 20);
1996         }
1997      }
1998      defId(i->def(0), 17);
1999      emitPredicate(i);
2000   } else
2001   if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2002      uint8_t sr = getSRegEncoding(i->src(0));
2003
2004      if (i->encSize == 8) {
2005         code[0] = 0x00000004 | (sr << 26);
2006         code[1] = 0x2c000000;
2007      } else {
2008         code[0] = 0x40000008 | (sr << 20);
2009      }
2010      defId(i->def(0), 14);
2011
2012      emitPredicate(i);
2013   } else
2014   if (i->encSize == 8) {
2015      uint64_t opc;
2016
2017      if (i->src(0).getFile() == FILE_IMMEDIATE)
2018         opc = HEX64(18000000, 000001e2);
2019      else
2020      if (i->src(0).getFile() == FILE_PREDICATE)
2021         opc = HEX64(080e0000, 1c000004);
2022      else
2023         opc = HEX64(28000000, 00000004);
2024
2025      if (i->src(0).getFile() != FILE_PREDICATE)
2026         opc |= i->lanes << 5;
2027
2028      emitForm_B(i, opc);
2029
2030      // Explicitly emit the predicate source as emitForm_B skips it.
2031      if (i->src(0).getFile() == FILE_PREDICATE)
2032         srcId(i->src(0), 20);
2033   } else {
2034      uint32_t imm;
2035
2036      if (i->src(0).getFile() == FILE_IMMEDIATE) {
2037         imm = SDATA(i->src(0)).u32;
2038         if (imm & 0xfff00000) {
2039            assert(!(imm & 0x000fffff));
2040            code[0] = 0x00000318 | imm;
2041         } else {
2042            assert(imm < 0x800 || ((int32_t)imm >= -0x800));
2043            code[0] = 0x00000118 | (imm << 20);
2044         }
2045      } else {
2046         code[0] = 0x0028;
2047         emitShortSrc2(i->src(0));
2048      }
2049      defId(i->def(0), 14);
2050
2051      emitPredicate(i);
2052   }
2053}
2054
2055void
2056CodeEmitterNVC0::emitATOM(const Instruction *i)
2057{
2058   const bool hasDst = i->defExists(0);
2059   const bool casOrExch =
2060      i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
2061      i->subOp == NV50_IR_SUBOP_ATOM_CAS;
2062
2063   if (i->dType == TYPE_U64) {
2064      switch (i->subOp) {
2065      case NV50_IR_SUBOP_ATOM_ADD:
2066         code[0] = 0x205;
2067         if (hasDst)
2068            code[1] = 0x507e0000;
2069         else
2070            code[1] = 0x10000000;
2071         break;
2072      case NV50_IR_SUBOP_ATOM_EXCH:
2073         code[0] = 0x305;
2074         code[1] = 0x507e0000;
2075         break;
2076      case NV50_IR_SUBOP_ATOM_CAS:
2077         code[0] = 0x325;
2078         code[1] = 0x50000000;
2079         break;
2080      default:
2081         assert(!"invalid u64 red op");
2082         break;
2083      }
2084   } else
2085   if (i->dType == TYPE_U32) {
2086      switch (i->subOp) {
2087      case NV50_IR_SUBOP_ATOM_EXCH:
2088         code[0] = 0x105;
2089         code[1] = 0x507e0000;
2090         break;
2091      case NV50_IR_SUBOP_ATOM_CAS:
2092         code[0] = 0x125;
2093         code[1] = 0x50000000;
2094         break;
2095      default:
2096         code[0] = 0x5 | (i->subOp << 5);
2097         if (hasDst)
2098            code[1] = 0x507e0000;
2099         else
2100            code[1] = 0x10000000;
2101         break;
2102      }
2103   } else
2104   if (i->dType == TYPE_S32) {
2105      assert(i->subOp <= 2);
2106      code[0] = 0x205 | (i->subOp << 5);
2107      if (hasDst)
2108         code[1] = 0x587e0000;
2109      else
2110         code[1] = 0x18000000;
2111   } else
2112   if (i->dType == TYPE_F32) {
2113      assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
2114      code[0] = 0x205;
2115      if (hasDst)
2116         code[1] = 0x687e0000;
2117      else
2118         code[1] = 0x28000000;
2119   }
2120
2121   emitPredicate(i);
2122
2123   srcId(i->src(1), 14);
2124
2125   if (hasDst)
2126      defId(i->def(0), 32 + 11);
2127   else
2128   if (casOrExch)
2129      code[1] |= 63 << 11;
2130
2131   if (hasDst || casOrExch) {
2132      const int32_t offset = SDATA(i->src(0)).offset;
2133      assert(offset < 0x80000 && offset >= -0x80000);
2134      code[0] |= offset << 26;
2135      code[1] |= (offset & 0x1ffc0) >> 6;
2136      code[1] |= (offset & 0xe0000) << 6;
2137   } else {
2138      srcAddr32(i->src(0), 26, 0);
2139   }
2140   if (i->getIndirect(0, 0)) {
2141      srcId(i->getIndirect(0, 0), 20);
2142      if (i->getIndirect(0, 0)->reg.size == 8)
2143         code[1] |= 1 << 26;
2144   } else {
2145      code[0] |= 63 << 20;
2146   }
2147
2148   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2149      assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2150      code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2151   }
2152}
2153
2154void
2155CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2156{
2157   switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2158   case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2159   case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
2160   default:
2161      code[0] = 0x45;
2162      assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2163      break;
2164   }
2165   code[1] = 0xe0000000;
2166
2167   emitPredicate(i);
2168}
2169
2170void
2171CodeEmitterNVC0::emitCCTL(const Instruction *i)
2172{
2173   code[0] = 0x00000005 | (i->subOp << 5);
2174
2175   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2176      code[1] = 0x98000000;
2177      srcAddr32(i->src(0), 28, 2);
2178   } else {
2179      code[1] = 0xd0000000;
2180      setAddress24(i->src(0));
2181   }
2182   if (uses64bitAddress(i))
2183      code[1] |= 1 << 26;
2184   srcId(i->src(0).getIndirect(0), 20);
2185
2186   emitPredicate(i);
2187
2188   defId(i, 0, 14);
2189}
2190
2191void
2192CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2193{
2194   uint8_t m;
2195   switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2196   case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2197   case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2198   case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2199   case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2200   case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2201   case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2202   case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2203   case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2204   case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2205   case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2206   case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2207   case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2208   case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2209   case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2210   case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2211   default:
2212      return;
2213   }
2214   code[0] |= m << 5;
2215   if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2216      code[1] |= 1 << 16;
2217}
2218
2219void
2220CodeEmitterNVC0::emitSUCalc(Instruction *i)
2221{
2222   ImmediateValue *imm = NULL;
2223   uint64_t opc;
2224
2225   if (i->srcExists(2)) {
2226      imm = i->getSrc(2)->asImm();
2227      if (imm)
2228         i->setSrc(2, NULL); // special case, make emitForm_A not assert
2229   }
2230
2231   switch (i->op) {
2232   case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2233   case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2234   case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2235   default:
2236      assert(0);
2237      return;
2238   }
2239   emitForm_A(i, opc);
2240
2241   if (i->op == OP_SUCLAMP) {
2242      if (i->dType == TYPE_S32)
2243         code[0] |= 1 << 9;
2244      emitSUCLAMPMode(i->subOp);
2245   }
2246
2247   if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2248         code[1] |= 1 << 16;
2249
2250   if (i->op != OP_SUEAU) {
2251      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2252         code[0] |= 63 << 14;
2253         code[1] |= i->getDef(0)->reg.data.id << 23;
2254      } else
2255      if (i->defExists(1)) { // r, p
2256         assert(i->def(1).getFile() == FILE_PREDICATE);
2257         code[1] |= i->getDef(1)->reg.data.id << 23;
2258      } else { // r, #
2259         code[1] |= 7 << 23;
2260      }
2261   }
2262   if (imm) {
2263      assert(i->op == OP_SUCLAMP);
2264      i->setSrc(2, imm);
2265      code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2266   }
2267}
2268
2269void
2270CodeEmitterNVC0::emitSUGType(DataType ty)
2271{
2272   switch (ty) {
2273   case TYPE_S32: code[1] |= 1 << 13; break;
2274   case TYPE_U8:  code[1] |= 2 << 13; break;
2275   case TYPE_S8:  code[1] |= 3 << 13; break;
2276   default:
2277      assert(ty == TYPE_U32);
2278      break;
2279   }
2280}
2281
2282void
2283CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2284{
2285   const uint32_t offset = i->getSrc(s)->reg.data.offset;
2286
2287   assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2288   assert(offset == (offset & 0xfffc));
2289
2290   code[1] |= 1 << 21;
2291   code[0] |= offset << 24;
2292   code[1] |= offset >> 8;
2293   code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2294}
2295
2296void
2297CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2298{
2299   if (!i->srcExists(s) || (i->predSrc == s)) {
2300      code[1] |= 0x7 << 17;
2301   } else {
2302      if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2303         code[1] |= 1 << 20;
2304      srcId(i->src(s), 32 + 17);
2305   }
2306}
2307
2308void
2309CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2310{
2311   code[0] = 0x5;
2312   code[1] = 0xd4000000 | (i->subOp << 15);
2313
2314   emitLoadStoreType(i->dType);
2315   emitSUGType(i->sType);
2316   emitCachingMode(i->cache);
2317
2318   emitPredicate(i);
2319   defId(i->def(0), 14); // destination
2320   srcId(i->src(0), 20); // address
2321   // format
2322   if (i->src(1).getFile() == FILE_GPR)
2323      srcId(i->src(1), 26);
2324   else
2325      setSUConst16(i, 1);
2326   setSUPred(i, 2);
2327}
2328
2329void
2330CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2331{
2332   code[0] = 0x5;
2333   code[1] = 0xdc000000 | (i->subOp << 15);
2334
2335   if (i->op == OP_SUSTP)
2336      code[1] |= i->tex.mask << 22;
2337   else
2338      emitLoadStoreType(i->dType);
2339   emitSUGType(i->sType);
2340   emitCachingMode(i->cache);
2341
2342   emitPredicate(i);
2343   srcId(i->src(0), 20); // address
2344   // format
2345   if (i->src(1).getFile() == FILE_GPR)
2346      srcId(i->src(1), 26);
2347   else
2348      setSUConst16(i, 1);
2349   srcId(i->src(3), 14); // values
2350   setSUPred(i, 2);
2351}
2352
2353void
2354CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
2355{
2356   assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2357
2358   if (i->tex.rIndirectSrc < 0) {
2359      code[1] |= 0x00004000;
2360      code[0] |= i->tex.r << 26;
2361   } else {
2362      srcId(i, i->tex.rIndirectSrc, 26);
2363   }
2364}
2365
2366void
2367CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
2368{
2369   assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2370
2371   code[1] |= (i->tex.target.getDim() - 1) << 12;
2372   if (i->tex.target.isArray() || i->tex.target.isCube() ||
2373       i->tex.target.getDim() == 3) {
2374      // use e2d mode for 3-dim images, arrays and cubes.
2375      code[1] |= 3 << 12;
2376   }
2377
2378   srcId(i->src(0), 20);
2379}
2380
2381void
2382CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
2383{
2384   assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2385
2386   code[0] = 0x5;
2387   code[1] = 0xf0000000;
2388
2389   emitPredicate(i);
2390   emitLoadStoreType(i->sType);
2391
2392   defId(i->def(0), 14);
2393
2394   if (i->defExists(1)) {
2395      defId(i->def(1), 32 + 22);
2396   } else {
2397      code[1] |= 7 << 22;
2398   }
2399
2400   emitSUAddr(i);
2401   emitSUDim(i);
2402}
2403
2404void
2405CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
2406{
2407   assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2408
2409   code[0] = 0x5;
2410   code[1] = 0xd4000000 | (i->subOp << 15);
2411
2412   emitPredicate(i);
2413   emitLoadStoreType(i->dType);
2414
2415   defId(i->def(0), 14);
2416
2417   emitCachingMode(i->cache);
2418   emitSUAddr(i);
2419   emitSUDim(i);
2420}
2421
2422void
2423CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
2424{
2425   assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2426
2427   code[0] = 0x5;
2428   code[1] = 0xdc000000 | (i->subOp << 15);
2429
2430   if (i->op == OP_SUSTP)
2431      code[1] |= i->tex.mask << 17;
2432   else
2433      emitLoadStoreType(i->dType);
2434
2435   emitPredicate(i);
2436
2437   srcId(i->src(1), 14);
2438
2439   emitCachingMode(i->cache);
2440   emitSUAddr(i);
2441   emitSUDim(i);
2442}
2443
2444void
2445CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2446{
2447   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2448   case 0:
2449      code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2450      code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
2451      code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
2452      code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2453      break;
2454   case 1:
2455      code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
2456      code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2457      code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
2458      code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
2459      code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
2460      code[1] |= (i->mask & 0x3) << 2;
2461      break;
2462   case 2:
2463      code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2464      code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2465      code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2466      code[1] |= (i->mask & 0x3) << 2;
2467      code[1] |= (i->mask & 0xc) << 21;
2468      break;
2469   default:
2470      assert(0);
2471      break;
2472   }
2473}
2474
2475void
2476CodeEmitterNVC0::emitVSHL(const Instruction *i)
2477{
2478   uint64_t opc = 0x4;
2479
2480   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2481   case 0: opc |= 0xe8ULL << 56; break;
2482   case 1: opc |= 0xb4ULL << 56; break;
2483   case 2: opc |= 0x94ULL << 56; break;
2484   default:
2485      assert(0);
2486      break;
2487   }
2488   if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2489      if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2490      if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2491   } else {
2492      if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2493      if (isSignedType(i->sType)) opc |= 1 << 6;
2494   }
2495   emitForm_A(i, opc);
2496   emitVectorSubOp(i);
2497
2498   if (i->saturate)
2499      code[0] |= 1 << 9;
2500   if (i->flagsDef >= 0)
2501      code[1] |= 1 << 16;
2502}
2503
2504void
2505CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2506{
2507   assert(i->encSize == 8);
2508   emitForm_A(i, HEX64(10000000, 00000006));
2509   code[0] |= i->subOp << 5;
2510   code[1] |= 0x00e00000;
2511}
2512
2513void
2514CodeEmitterNVC0::emitVOTE(const Instruction *i)
2515{
2516   assert(i->src(0).getFile() == FILE_PREDICATE);
2517
2518   code[0] = 0x00000004 | (i->subOp << 5);
2519   code[1] = 0x48000000;
2520
2521   emitPredicate(i);
2522
2523   unsigned rp = 0;
2524   for (int d = 0; i->defExists(d); d++) {
2525      if (i->def(d).getFile() == FILE_PREDICATE) {
2526         assert(!(rp & 2));
2527         rp |= 2;
2528         defId(i->def(d), 32 + 22);
2529      } else if (i->def(d).getFile() == FILE_GPR) {
2530         assert(!(rp & 1));
2531         rp |= 1;
2532         defId(i->def(d), 14);
2533      } else {
2534         assert(!"Unhandled def");
2535      }
2536   }
2537   if (!(rp & 1))
2538      code[0] |= 63 << 14;
2539   if (!(rp & 2))
2540      code[1] |= 7 << 22;
2541   if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
2542      code[0] |= 1 << 23;
2543   srcId(i->src(0), 20);
2544}
2545
2546bool
2547CodeEmitterNVC0::emitInstruction(Instruction *insn)
2548{
2549   unsigned int size = insn->encSize;
2550
2551   if (writeIssueDelays && !(codeSize & 0x3f))
2552      size += 8;
2553
2554   if (!insn->encSize) {
2555      ERROR("skipping unencodable instruction: "); insn->print();
2556      return false;
2557   } else
2558   if (codeSize + size > codeSizeLimit) {
2559      ERROR("code emitter output buffer too small\n");
2560      return false;
2561   }
2562
2563   if (writeIssueDelays) {
2564      if (!(codeSize & 0x3f)) {
2565         code[0] = 0x00000007; // cf issue delay "instruction"
2566         code[1] = 0x20000000;
2567         code += 2;
2568         codeSize += 8;
2569      }
2570      const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2571      uint32_t *data = code - (id * 2 + 2);
2572      if (id <= 2) {
2573         data[0] |= insn->sched << (id * 8 + 4);
2574      } else
2575      if (id == 3) {
2576         data[0] |= insn->sched << 28;
2577         data[1] |= insn->sched >> 4;
2578      } else {
2579         data[1] |= insn->sched << ((id - 4) * 8 + 4);
2580      }
2581   }
2582
2583   // assert that instructions with multiple defs don't corrupt registers
2584   for (int d = 0; insn->defExists(d); ++d)
2585      assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2586
2587   switch (insn->op) {
2588   case OP_MOV:
2589   case OP_RDSV:
2590      emitMOV(insn);
2591      break;
2592   case OP_NOP:
2593      break;
2594   case OP_LOAD:
2595      emitLOAD(insn);
2596      break;
2597   case OP_STORE:
2598      emitSTORE(insn);
2599      break;
2600   case OP_LINTERP:
2601   case OP_PINTERP:
2602      emitINTERP(insn);
2603      break;
2604   case OP_VFETCH:
2605      emitVFETCH(insn);
2606      break;
2607   case OP_EXPORT:
2608      emitEXPORT(insn);
2609      break;
2610   case OP_PFETCH:
2611      emitPFETCH(insn);
2612      break;
2613   case OP_AFETCH:
2614      emitAFETCH(insn);
2615      break;
2616   case OP_EMIT:
2617   case OP_RESTART:
2618      emitOUT(insn);
2619      break;
2620   case OP_ADD:
2621   case OP_SUB:
2622      if (insn->dType == TYPE_F64)
2623         emitDADD(insn);
2624      else if (isFloatType(insn->dType))
2625         emitFADD(insn);
2626      else
2627         emitUADD(insn);
2628      break;
2629   case OP_MUL:
2630      if (insn->dType == TYPE_F64)
2631         emitDMUL(insn);
2632      else if (isFloatType(insn->dType))
2633         emitFMUL(insn);
2634      else
2635         emitUMUL(insn);
2636      break;
2637   case OP_MAD:
2638   case OP_FMA:
2639      if (insn->dType == TYPE_F64)
2640         emitDMAD(insn);
2641      else if (isFloatType(insn->dType))
2642         emitFMAD(insn);
2643      else
2644         emitIMAD(insn);
2645      break;
2646   case OP_SAD:
2647      emitISAD(insn);
2648      break;
2649   case OP_SHLADD:
2650      emitSHLADD(insn);
2651      break;
2652   case OP_NOT:
2653      emitNOT(insn);
2654      break;
2655   case OP_AND:
2656      emitLogicOp(insn, 0);
2657      break;
2658   case OP_OR:
2659      emitLogicOp(insn, 1);
2660      break;
2661   case OP_XOR:
2662      emitLogicOp(insn, 2);
2663      break;
2664   case OP_SHL:
2665   case OP_SHR:
2666      emitShift(insn);
2667      break;
2668   case OP_SET:
2669   case OP_SET_AND:
2670   case OP_SET_OR:
2671   case OP_SET_XOR:
2672      emitSET(insn->asCmp());
2673      break;
2674   case OP_SELP:
2675      emitSELP(insn);
2676      break;
2677   case OP_SLCT:
2678      emitSLCT(insn->asCmp());
2679      break;
2680   case OP_MIN:
2681   case OP_MAX:
2682      emitMINMAX(insn);
2683      break;
2684   case OP_ABS:
2685   case OP_NEG:
2686   case OP_CEIL:
2687   case OP_FLOOR:
2688   case OP_TRUNC:
2689   case OP_SAT:
2690      emitCVT(insn);
2691      break;
2692   case OP_CVT:
2693      if (insn->def(0).getFile() == FILE_PREDICATE ||
2694          insn->src(0).getFile() == FILE_PREDICATE)
2695         emitMOV(insn);
2696      else
2697         emitCVT(insn);
2698      break;
2699   case OP_RSQ:
2700      emitSFnOp(insn, 5 + 2 * insn->subOp);
2701      break;
2702   case OP_RCP:
2703      emitSFnOp(insn, 4 + 2 * insn->subOp);
2704      break;
2705   case OP_LG2:
2706      emitSFnOp(insn, 3);
2707      break;
2708   case OP_EX2:
2709      emitSFnOp(insn, 2);
2710      break;
2711   case OP_SIN:
2712      emitSFnOp(insn, 1);
2713      break;
2714   case OP_COS:
2715      emitSFnOp(insn, 0);
2716      break;
2717   case OP_PRESIN:
2718   case OP_PREEX2:
2719      emitPreOp(insn);
2720      break;
2721   case OP_TEX:
2722   case OP_TXB:
2723   case OP_TXL:
2724   case OP_TXD:
2725   case OP_TXF:
2726   case OP_TXG:
2727   case OP_TXLQ:
2728      emitTEX(insn->asTex());
2729      break;
2730   case OP_TXQ:
2731      emitTXQ(insn->asTex());
2732      break;
2733   case OP_TEXBAR:
2734      emitTEXBAR(insn);
2735      break;
2736   case OP_SUBFM:
2737   case OP_SUCLAMP:
2738   case OP_SUEAU:
2739      emitSUCalc(insn);
2740      break;
2741   case OP_MADSP:
2742      emitMADSP(insn);
2743      break;
2744   case OP_SULDB:
2745      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2746         emitSULDGB(insn->asTex());
2747      else
2748         emitSULDB(insn->asTex());
2749      break;
2750   case OP_SUSTB:
2751   case OP_SUSTP:
2752      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2753         emitSUSTGx(insn->asTex());
2754      else
2755         emitSUSTx(insn->asTex());
2756      break;
2757   case OP_SULEA:
2758      emitSULEA(insn->asTex());
2759      break;
2760   case OP_ATOM:
2761      emitATOM(insn);
2762      break;
2763   case OP_BRA:
2764   case OP_CALL:
2765   case OP_PRERET:
2766   case OP_RET:
2767   case OP_DISCARD:
2768   case OP_EXIT:
2769   case OP_PRECONT:
2770   case OP_CONT:
2771   case OP_PREBREAK:
2772   case OP_BREAK:
2773   case OP_JOINAT:
2774   case OP_BRKPT:
2775   case OP_QUADON:
2776   case OP_QUADPOP:
2777      emitFlow(insn);
2778      break;
2779   case OP_QUADOP:
2780      emitQUADOP(insn, insn->subOp, insn->lanes);
2781      break;
2782   case OP_DFDX:
2783      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2784      break;
2785   case OP_DFDY:
2786      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2787      break;
2788   case OP_POPCNT:
2789      emitPOPC(insn);
2790      break;
2791   case OP_INSBF:
2792      emitINSBF(insn);
2793      break;
2794   case OP_EXTBF:
2795      emitEXTBF(insn);
2796      break;
2797   case OP_BFIND:
2798      emitBFIND(insn);
2799      break;
2800   case OP_PERMT:
2801      emitPERMT(insn);
2802      break;
2803   case OP_JOIN:
2804      emitNOP(insn);
2805      insn->join = 1;
2806      break;
2807   case OP_BAR:
2808      emitBAR(insn);
2809      break;
2810   case OP_MEMBAR:
2811      emitMEMBAR(insn);
2812      break;
2813   case OP_CCTL:
2814      emitCCTL(insn);
2815      break;
2816   case OP_VSHL:
2817      emitVSHL(insn);
2818      break;
2819   case OP_PIXLD:
2820      emitPIXLD(insn);
2821      break;
2822   case OP_VOTE:
2823      emitVOTE(insn);
2824      break;
2825   case OP_PHI:
2826   case OP_UNION:
2827   case OP_CONSTRAINT:
2828      ERROR("operation should have been eliminated");
2829      return false;
2830   case OP_EXP:
2831   case OP_LOG:
2832   case OP_SQRT:
2833   case OP_POW:
2834      ERROR("operation should have been lowered\n");
2835      return false;
2836   default:
2837      ERROR("unknown op: %u\n", insn->op);
2838      return false;
2839   }
2840
2841   if (insn->join) {
2842      code[0] |= 0x10;
2843      assert(insn->encSize == 8);
2844   }
2845
2846   code += insn->encSize / 4;
2847   codeSize += insn->encSize;
2848   return true;
2849}
2850
2851uint32_t
2852CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2853{
2854   const Target::OpInfo &info = targ->getOpInfo(i);
2855
2856   if (writeIssueDelays || info.minEncSize == 8 || 1)
2857      return 8;
2858
2859   if (i->ftz || i->saturate || i->join)
2860      return 8;
2861   if (i->rnd != ROUND_N)
2862      return 8;
2863   if (i->predSrc >= 0 && i->op == OP_MAD)
2864      return 8;
2865
2866   if (i->op == OP_PINTERP) {
2867      if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2868         return 8;
2869   } else
2870   if (i->op == OP_MOV && i->lanes != 0xf) {
2871      return 8;
2872   }
2873
2874   for (int s = 0; i->srcExists(s); ++s) {
2875      if (i->src(s).isIndirect(0))
2876         return 8;
2877
2878      if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2879         if (SDATA(i->src(s)).offset >= 0x100)
2880            return 8;
2881         if (i->getSrc(s)->reg.fileIndex > 1 &&
2882             i->getSrc(s)->reg.fileIndex != 16)
2883             return 8;
2884      } else
2885      if (i->src(s).getFile() == FILE_IMMEDIATE) {
2886         if (i->dType == TYPE_F32) {
2887            if (SDATA(i->src(s)).u32 >= 0x100)
2888               return 8;
2889         } else {
2890            if (SDATA(i->src(s)).u32 > 0xff)
2891               return 8;
2892         }
2893      }
2894
2895      if (i->op == OP_CVT)
2896         continue;
2897      if (i->src(s).mod != Modifier(0)) {
2898         if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2899            if (i->op != OP_RSQ)
2900               return 8;
2901         if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2902            if (i->op != OP_ADD || s != 0)
2903               return 8;
2904      }
2905   }
2906
2907   return 4;
2908}
2909
2910// Simplified, erring on safe side.
2911class SchedDataCalculator : public Pass
2912{
2913public:
2914   SchedDataCalculator(const Target *targ) : targ(targ) { }
2915
2916private:
2917   struct RegScores
2918   {
2919      struct Resource {
2920         int st[DATA_FILE_COUNT]; // LD to LD delay 3
2921         int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2922         int tex; // TEX to non-TEX delay 17 (0x11)
2923         int sfu; // SFU to SFU delay 3 (except PRE-ops)
2924         int imul; // integer MUL to MUL delay 3
2925      } res;
2926      struct ScoreData {
2927         int r[256];
2928         int p[8];
2929         int c;
2930      } rd, wr;
2931      int base;
2932      int regs;
2933
2934      void rebase(const int base)
2935      {
2936         const int delta = this->base - base;
2937         if (!delta)
2938            return;
2939         this->base = 0;
2940
2941         for (int i = 0; i < regs; ++i) {
2942            rd.r[i] += delta;
2943            wr.r[i] += delta;
2944         }
2945         for (int i = 0; i < 8; ++i) {
2946            rd.p[i] += delta;
2947            wr.p[i] += delta;
2948         }
2949         rd.c += delta;
2950         wr.c += delta;
2951
2952         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2953            res.ld[f] += delta;
2954            res.st[f] += delta;
2955         }
2956         res.sfu += delta;
2957         res.imul += delta;
2958         res.tex += delta;
2959      }
2960      void wipe(int regs)
2961      {
2962         memset(&rd, 0, sizeof(rd));
2963         memset(&wr, 0, sizeof(wr));
2964         memset(&res, 0, sizeof(res));
2965         this->regs = regs;
2966      }
2967      int getLatest(const ScoreData& d) const
2968      {
2969         int max = 0;
2970         for (int i = 0; i < regs; ++i)
2971            if (d.r[i] > max)
2972               max = d.r[i];
2973         for (int i = 0; i < 8; ++i)
2974            if (d.p[i] > max)
2975               max = d.p[i];
2976         if (d.c > max)
2977            max = d.c;
2978         return max;
2979      }
2980      inline int getLatestRd() const
2981      {
2982         return getLatest(rd);
2983      }
2984      inline int getLatestWr() const
2985      {
2986         return getLatest(wr);
2987      }
2988      inline int getLatest() const
2989      {
2990         const int a = getLatestRd();
2991         const int b = getLatestWr();
2992
2993         int max = MAX2(a, b);
2994         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2995            max = MAX2(res.ld[f], max);
2996            max = MAX2(res.st[f], max);
2997         }
2998         max = MAX2(res.sfu, max);
2999         max = MAX2(res.imul, max);
3000         max = MAX2(res.tex, max);
3001         return max;
3002      }
3003      void setMax(const RegScores *that)
3004      {
3005         for (int i = 0; i < regs; ++i) {
3006            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3007            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3008         }
3009         for (int i = 0; i < 8; ++i) {
3010            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3011            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3012         }
3013         rd.c = MAX2(rd.c, that->rd.c);
3014         wr.c = MAX2(wr.c, that->wr.c);
3015
3016         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3017            res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
3018            res.st[f] = MAX2(res.st[f], that->res.st[f]);
3019         }
3020         res.sfu = MAX2(res.sfu, that->res.sfu);
3021         res.imul = MAX2(res.imul, that->res.imul);
3022         res.tex = MAX2(res.tex, that->res.tex);
3023      }
3024      void print(int cycle)
3025      {
3026         for (int i = 0; i < regs; ++i) {
3027            if (rd.r[i] > cycle)
3028               INFO("rd $r%i @ %i\n", i, rd.r[i]);
3029            if (wr.r[i] > cycle)
3030               INFO("wr $r%i @ %i\n", i, wr.r[i]);
3031         }
3032         for (int i = 0; i < 8; ++i) {
3033            if (rd.p[i] > cycle)
3034               INFO("rd $p%i @ %i\n", i, rd.p[i]);
3035            if (wr.p[i] > cycle)
3036               INFO("wr $p%i @ %i\n", i, wr.p[i]);
3037         }
3038         if (rd.c > cycle)
3039            INFO("rd $c @ %i\n", rd.c);
3040         if (wr.c > cycle)
3041            INFO("wr $c @ %i\n", wr.c);
3042         if (res.sfu > cycle)
3043            INFO("sfu @ %i\n", res.sfu);
3044         if (res.imul > cycle)
3045            INFO("imul @ %i\n", res.imul);
3046         if (res.tex > cycle)
3047            INFO("tex @ %i\n", res.tex);
3048      }
3049   };
3050
3051   RegScores *score; // for current BB
3052   std::vector<RegScores> scoreBoards;
3053   int prevData;
3054   operation prevOp;
3055
3056   const Target *targ;
3057
3058   bool visit(Function *);
3059   bool visit(BasicBlock *);
3060
3061   void commitInsn(const Instruction *, int cycle);
3062   int calcDelay(const Instruction *, int cycle) const;
3063   void setDelay(Instruction *, int delay, Instruction *next);
3064
3065   void recordRd(const Value *, const int ready);
3066   void recordWr(const Value *, const int ready);
3067   void checkRd(const Value *, int cycle, int& delay) const;
3068   void checkWr(const Value *, int cycle, int& delay) const;
3069
3070   int getCycles(const Instruction *, int origDelay) const;
3071};
3072
3073void
3074SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
3075{
3076   if (insn->op == OP_EXIT || insn->op == OP_RET)
3077      delay = MAX2(delay, 14);
3078
3079   if (insn->op == OP_TEXBAR) {
3080      // TODO: except if results not used before EXIT
3081      insn->sched = 0xc2;
3082   } else
3083   if (insn->op == OP_JOIN || insn->join) {
3084      insn->sched = 0x00;
3085   } else
3086   if (delay >= 0 || prevData == 0x04 ||
3087       !next || !targ->canDualIssue(insn, next)) {
3088      insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
3089      if (prevOp == OP_EXPORT)
3090         insn->sched |= 0x40;
3091      else
3092         insn->sched |= 0x20;
3093   } else {
3094      insn->sched = 0x04; // dual-issue
3095   }
3096
3097   if (prevData != 0x04 || prevOp != OP_EXPORT)
3098      if (insn->sched != 0x04 || insn->op == OP_EXPORT)
3099         prevOp = insn->op;
3100
3101   prevData = insn->sched;
3102}
3103
3104int
3105SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
3106{
3107   if (insn->sched & 0x80) {
3108      int c = (insn->sched & 0x0f) * 2 + 1;
3109      if (insn->op == OP_TEXBAR && origDelay > 0)
3110         c += origDelay;
3111      return c;
3112   }
3113   if (insn->sched & 0x60)
3114      return (insn->sched & 0x1f) + 1;
3115   return (insn->sched == 0x04) ? 0 : 32;
3116}
3117
3118bool
3119SchedDataCalculator::visit(Function *func)
3120{
3121   int regs = targ->getFileSize(FILE_GPR) + 1;
3122   scoreBoards.resize(func->cfg.getSize());
3123   for (size_t i = 0; i < scoreBoards.size(); ++i)
3124      scoreBoards[i].wipe(regs);
3125   return true;
3126}
3127
3128bool
3129SchedDataCalculator::visit(BasicBlock *bb)
3130{
3131   Instruction *insn;
3132   Instruction *next = NULL;
3133
3134   int cycle = 0;
3135
3136   prevData = 0x00;
3137   prevOp = OP_NOP;
3138   score = &scoreBoards.at(bb->getId());
3139
3140   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
3141      // back branches will wait until all target dependencies are satisfied
3142      if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
3143         continue;
3144      BasicBlock *in = BasicBlock::get(ei.getNode());
3145      if (in->getExit()) {
3146         if (prevData != 0x04)
3147            prevData = in->getExit()->sched;
3148         prevOp = in->getExit()->op;
3149      }
3150      score->setMax(&scoreBoards.at(in->getId()));
3151   }
3152   if (bb->cfg.incidentCount() > 1)
3153      prevOp = OP_NOP;
3154
3155#ifdef NVC0_DEBUG_SCHED_DATA
3156   INFO("=== BB:%i initial scores\n", bb->getId());
3157   score->print(cycle);
3158#endif
3159
3160   for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
3161      next = insn->next;
3162
3163      commitInsn(insn, cycle);
3164      int delay = calcDelay(next, cycle);
3165      setDelay(insn, delay, next);
3166      cycle += getCycles(insn, delay);
3167
3168#ifdef NVC0_DEBUG_SCHED_DATA
3169      INFO("cycle %i, sched %02x\n", cycle, insn->sched);
3170      insn->print();
3171      next->print();
3172#endif
3173   }
3174   if (!insn)
3175      return true;
3176   commitInsn(insn, cycle);
3177
3178   int bbDelay = -1;
3179
3180   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
3181      BasicBlock *out = BasicBlock::get(ei.getNode());
3182
3183      if (ei.getType() != Graph::Edge::BACK) {
3184         // only test the first instruction of the outgoing block
3185         next = out->getEntry();
3186         if (next)
3187            bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
3188      } else {
3189         // wait until all dependencies are satisfied
3190         const int regsFree = score->getLatest();
3191         next = out->getFirst();
3192         for (int c = cycle; next && c < regsFree; next = next->next) {
3193            bbDelay = MAX2(bbDelay, calcDelay(next, c));
3194            c += getCycles(next, bbDelay);
3195         }
3196         next = NULL;
3197      }
3198   }
3199   if (bb->cfg.outgoingCount() != 1)
3200      next = NULL;
3201   setDelay(insn, bbDelay, next);
3202   cycle += getCycles(insn, bbDelay);
3203
3204   score->rebase(cycle); // common base for initializing out blocks' scores
3205   return true;
3206}
3207
3208#define NVE4_MAX_ISSUE_DELAY 0x1f
3209int
3210SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
3211{
3212   int delay = 0, ready = cycle;
3213
3214   for (int s = 0; insn->srcExists(s); ++s)
3215      checkRd(insn->getSrc(s), cycle, delay);
3216   // WAR & WAW don't seem to matter
3217   // for (int s = 0; insn->srcExists(s); ++s)
3218   //   recordRd(insn->getSrc(s), cycle);
3219
3220   switch (Target::getOpClass(insn->op)) {
3221   case OPCLASS_SFU:
3222      ready = score->res.sfu;
3223      break;
3224   case OPCLASS_ARITH:
3225      if (insn->op == OP_MUL && !isFloatType(insn->dType))
3226         ready = score->res.imul;
3227      break;
3228   case OPCLASS_TEXTURE:
3229      ready = score->res.tex;
3230      break;
3231   case OPCLASS_LOAD:
3232      ready = score->res.ld[insn->src(0).getFile()];
3233      break;
3234   case OPCLASS_STORE:
3235      ready = score->res.st[insn->src(0).getFile()];
3236      break;
3237   default:
3238      break;
3239   }
3240   if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
3241      ready = MAX2(ready, score->res.tex);
3242
3243   delay = MAX2(delay, ready - cycle);
3244
3245   // if can issue next cycle, delay is 0, not 1
3246   return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
3247}
3248
3249void
3250SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
3251{
3252   const int ready = cycle + targ->getLatency(insn);
3253
3254   for (int d = 0; insn->defExists(d); ++d)
3255      recordWr(insn->getDef(d), ready);
3256   // WAR & WAW don't seem to matter
3257   // for (int s = 0; insn->srcExists(s); ++s)
3258   //   recordRd(insn->getSrc(s), cycle);
3259
3260   switch (Target::getOpClass(insn->op)) {
3261   case OPCLASS_SFU:
3262      score->res.sfu = cycle + 4;
3263      break;
3264   case OPCLASS_ARITH:
3265      if (insn->op == OP_MUL && !isFloatType(insn->dType))
3266         score->res.imul = cycle + 4;
3267      break;
3268   case OPCLASS_TEXTURE:
3269      score->res.tex = cycle + 18;
3270      break;
3271   case OPCLASS_LOAD:
3272      if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3273         break;
3274      score->res.ld[insn->src(0).getFile()] = cycle + 4;
3275      score->res.st[insn->src(0).getFile()] = ready;
3276      break;
3277   case OPCLASS_STORE:
3278      score->res.st[insn->src(0).getFile()] = cycle + 4;
3279      score->res.ld[insn->src(0).getFile()] = ready;
3280      break;
3281   case OPCLASS_OTHER:
3282      if (insn->op == OP_TEXBAR)
3283         score->res.tex = cycle;
3284      break;
3285   default:
3286      break;
3287   }
3288
3289#ifdef NVC0_DEBUG_SCHED_DATA
3290   score->print(cycle);
3291#endif
3292}
3293
3294void
3295SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3296{
3297   int ready = cycle;
3298   int a, b;
3299
3300   switch (v->reg.file) {
3301   case FILE_GPR:
3302      a = v->reg.data.id;
3303      b = a + v->reg.size / 4;
3304      for (int r = a; r < b; ++r)
3305         ready = MAX2(ready, score->rd.r[r]);
3306      break;
3307   case FILE_PREDICATE:
3308      ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3309      break;
3310   case FILE_FLAGS:
3311      ready = MAX2(ready, score->rd.c);
3312      break;
3313   case FILE_SHADER_INPUT:
3314   case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3315   case FILE_MEMORY_LOCAL:
3316   case FILE_MEMORY_CONST:
3317   case FILE_MEMORY_SHARED:
3318   case FILE_MEMORY_GLOBAL:
3319   case FILE_SYSTEM_VALUE:
3320      // TODO: any restrictions here ?
3321      break;
3322   case FILE_IMMEDIATE:
3323      break;
3324   default:
3325      assert(0);
3326      break;
3327   }
3328   if (cycle < ready)
3329      delay = MAX2(delay, ready - cycle);
3330}
3331
3332void
3333SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3334{
3335   int ready = cycle;
3336   int a, b;
3337
3338   switch (v->reg.file) {
3339   case FILE_GPR:
3340      a = v->reg.data.id;
3341      b = a + v->reg.size / 4;
3342      for (int r = a; r < b; ++r)
3343         ready = MAX2(ready, score->wr.r[r]);
3344      break;
3345   case FILE_PREDICATE:
3346      ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3347      break;
3348   default:
3349      assert(v->reg.file == FILE_FLAGS);
3350      ready = MAX2(ready, score->wr.c);
3351      break;
3352   }
3353   if (cycle < ready)
3354      delay = MAX2(delay, ready - cycle);
3355}
3356
3357void
3358SchedDataCalculator::recordWr(const Value *v, const int ready)
3359{
3360   int a = v->reg.data.id;
3361
3362   if (v->reg.file == FILE_GPR) {
3363      int b = a + v->reg.size / 4;
3364      for (int r = a; r < b; ++r)
3365         score->rd.r[r] = ready;
3366   } else
3367   // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3368   if (v->reg.file == FILE_PREDICATE) {
3369      score->rd.p[a] = ready + 4;
3370   } else {
3371      assert(v->reg.file == FILE_FLAGS);
3372      score->rd.c = ready + 4;
3373   }
3374}
3375
3376void
3377SchedDataCalculator::recordRd(const Value *v, const int ready)
3378{
3379   int a = v->reg.data.id;
3380
3381   if (v->reg.file == FILE_GPR) {
3382      int b = a + v->reg.size / 4;
3383      for (int r = a; r < b; ++r)
3384         score->wr.r[r] = ready;
3385   } else
3386   if (v->reg.file == FILE_PREDICATE) {
3387      score->wr.p[a] = ready;
3388   } else
3389   if (v->reg.file == FILE_FLAGS) {
3390      score->wr.c = ready;
3391   }
3392}
3393
3394bool
3395calculateSchedDataNVC0(const Target *targ, Function *func)
3396{
3397   SchedDataCalculator sched(targ);
3398   return sched.run(func, true, true);
3399}
3400
3401void
3402CodeEmitterNVC0::prepareEmission(Function *func)
3403{
3404   CodeEmitter::prepareEmission(func);
3405
3406   if (targ->hasSWSched)
3407      calculateSchedDataNVC0(targ, func);
3408}
3409
3410CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3411   : CodeEmitter(target),
3412     targNVC0(target),
3413     writeIssueDelays(target->hasSWSched)
3414{
3415   code = NULL;
3416   codeSize = codeSizeLimit = 0;
3417   relocInfo = NULL;
3418}
3419
3420CodeEmitter *
3421TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3422{
3423   CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3424   emit->setProgramType(type);
3425   return emit;
3426}
3427
3428CodeEmitter *
3429TargetNVC0::getCodeEmitter(Program::Type type)
3430{
3431   if (chipset >= NVISA_GK20A_CHIPSET)
3432      return createCodeEmitterGK110(type);
3433   return createCodeEmitterNVC0(type);
3434}
3435
3436} // namespace nv50_ir
3437