1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv50_ir_target_nvc0.h"
24
25namespace nv50_ir {
26
27// Argh, all these assertions ...
28
29class CodeEmitterNVC0 : public CodeEmitter
30{
31public:
32   CodeEmitterNVC0(const TargetNVC0 *);
33
34   virtual bool emitInstruction(Instruction *);
35   virtual uint32_t getMinEncodingSize(const Instruction *) const;
36   virtual void prepareEmission(Function *);
37
38   inline void setProgramType(Program::Type pType) { progType = pType; }
39
40private:
41   const TargetNVC0 *targ;
42
43   Program::Type progType;
44
45   const bool writeIssueDelays;
46
47private:
48   void emitForm_A(const Instruction *, uint64_t);
49   void emitForm_B(const Instruction *, uint64_t);
50   void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52   void emitPredicate(const Instruction *);
53
54   void setAddress16(const ValueRef&);
55   void setImmediate(const Instruction *, const int s); // needs op already set
56   void setImmediateS8(const ValueRef&);
57
58   void emitCondCode(CondCode cc, int pos);
59   void emitInterpMode(const Instruction *);
60   void emitLoadStoreType(DataType ty);
61   void emitCachingMode(CacheMode c);
62
63   void emitShortSrc2(const ValueRef&);
64
65   inline uint8_t getSRegEncoding(const ValueRef&);
66
67   void roundMode_A(const Instruction *);
68   void roundMode_C(const Instruction *);
69   void roundMode_CS(const Instruction *);
70
71   void emitNegAbs12(const Instruction *);
72
73   void emitNOP(const Instruction *);
74
75   void emitLOAD(const Instruction *);
76   void emitSTORE(const Instruction *);
77   void emitMOV(const Instruction *);
78
79   void emitINTERP(const Instruction *);
80   void emitPFETCH(const Instruction *);
81   void emitVFETCH(const Instruction *);
82   void emitEXPORT(const Instruction *);
83   void emitOUT(const Instruction *);
84
85   void emitUADD(const Instruction *);
86   void emitFADD(const Instruction *);
87   void emitUMUL(const Instruction *);
88   void emitFMUL(const Instruction *);
89   void emitIMAD(const Instruction *);
90   void emitISAD(const Instruction *);
91   void emitFMAD(const Instruction *);
92
93   void emitNOT(Instruction *);
94   void emitLogicOp(const Instruction *, uint8_t subOp);
95   void emitPOPC(const Instruction *);
96   void emitINSBF(const Instruction *);
97   void emitShift(const Instruction *);
98
99   void emitSFnOp(const Instruction *, uint8_t subOp);
100
101   void emitCVT(Instruction *);
102   void emitMINMAX(const Instruction *);
103   void emitPreOp(const Instruction *);
104
105   void emitSET(const CmpInstruction *);
106   void emitSLCT(const CmpInstruction *);
107   void emitSELP(const Instruction *);
108
109   void emitTEXBAR(const Instruction *);
110   void emitTEX(const TexInstruction *);
111   void emitTEXCSAA(const TexInstruction *);
112   void emitTXQ(const TexInstruction *);
113   void emitPIXLD(const TexInstruction *);
114
115   void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
116
117   void emitFlow(const Instruction *);
118
119   inline void defId(const ValueDef&, const int pos);
120   inline void srcId(const ValueRef&, const int pos);
121   inline void srcId(const ValueRef *, const int pos);
122   inline void srcId(const Instruction *, int s, const int pos);
123
124   inline void srcAddr32(const ValueRef&, const int pos); // address / 4
125
126   inline bool isLIMM(const ValueRef&, DataType ty);
127};
128
129// for better visibility
130#define HEX64(h, l) 0x##h##l##ULL
131
132#define SDATA(a) ((a).rep()->reg.data)
133#define DDATA(a) ((a).rep()->reg.data)
134
135void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
136{
137   code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
138}
139
140void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
141{
142   code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
143}
144
145void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
146{
147   int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
148   code[pos / 32] |= r << (pos % 32);
149}
150
151void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
152{
153   code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
154}
155
156void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
157{
158   code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
159}
160
161bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
162{
163   const ImmediateValue *imm = ref.get()->asImm();
164
165   return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
166}
167
168void
169CodeEmitterNVC0::roundMode_A(const Instruction *insn)
170{
171   switch (insn->rnd) {
172   case ROUND_M: code[1] |= 1 << 23; break;
173   case ROUND_P: code[1] |= 2 << 23; break;
174   case ROUND_Z: code[1] |= 3 << 23; break;
175   default:
176      assert(insn->rnd == ROUND_N);
177      break;
178   }
179}
180
181void
182CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
183{
184   if (i->src(1).mod.abs()) code[0] |= 1 << 6;
185   if (i->src(0).mod.abs()) code[0] |= 1 << 7;
186   if (i->src(1).mod.neg()) code[0] |= 1 << 8;
187   if (i->src(0).mod.neg()) code[0] |= 1 << 9;
188}
189
190void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
191{
192   uint8_t val;
193
194   switch (cc) {
195   case CC_LT:  val = 0x1; break;
196   case CC_LTU: val = 0x9; break;
197   case CC_EQ:  val = 0x2; break;
198   case CC_EQU: val = 0xa; break;
199   case CC_LE:  val = 0x3; break;
200   case CC_LEU: val = 0xb; break;
201   case CC_GT:  val = 0x4; break;
202   case CC_GTU: val = 0xc; break;
203   case CC_NE:  val = 0x5; break;
204   case CC_NEU: val = 0xd; break;
205   case CC_GE:  val = 0x6; break;
206   case CC_GEU: val = 0xe; break;
207   case CC_TR:  val = 0xf; break;
208   case CC_FL:  val = 0x0; break;
209
210   case CC_A:  val = 0x14; break;
211   case CC_NA: val = 0x13; break;
212   case CC_S:  val = 0x15; break;
213   case CC_NS: val = 0x12; break;
214   case CC_C:  val = 0x16; break;
215   case CC_NC: val = 0x11; break;
216   case CC_O:  val = 0x17; break;
217   case CC_NO: val = 0x10; break;
218
219   default:
220      val = 0;
221      assert(!"invalid condition code");
222      break;
223   }
224   code[pos / 32] |= val << (pos % 32);
225}
226
227void
228CodeEmitterNVC0::emitPredicate(const Instruction *i)
229{
230   if (i->predSrc >= 0) {
231      assert(i->getPredicate()->reg.file == FILE_PREDICATE);
232      srcId(i->src(i->predSrc), 10);
233      if (i->cc == CC_NOT_P)
234         code[0] |= 0x2000; // negate
235   } else {
236      code[0] |= 0x1c00;
237   }
238}
239
240void
241CodeEmitterNVC0::setAddress16(const ValueRef& src)
242{
243   Symbol *sym = src.get()->asSym();
244
245   assert(sym);
246
247   code[0] |= (sym->reg.data.offset & 0x003f) << 26;
248   code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
249}
250
251void
252CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
253{
254   const ImmediateValue *imm = i->src(s).get()->asImm();
255   uint32_t u32;
256
257   assert(imm);
258   u32 = imm->reg.data.u32;
259
260   if ((code[0] & 0xf) == 0x2) {
261      // LIMM
262      code[0] |= (u32 & 0x3f) << 26;
263      code[1] |= u32 >> 6;
264   } else
265   if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
266      // integer immediate
267      assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
268      assert(!(code[1] & 0xc000));
269      u32 &= 0xfffff;
270      code[0] |= (u32 & 0x3f) << 26;
271      code[1] |= 0xc000 | (u32 >> 6);
272   } else {
273      // float immediate
274      assert(!(u32 & 0x00000fff));
275      assert(!(code[1] & 0xc000));
276      code[0] |= ((u32 >> 12) & 0x3f) << 26;
277      code[1] |= 0xc000 | (u32 >> 18);
278   }
279}
280
281void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
282{
283   const ImmediateValue *imm = ref.get()->asImm();
284
285   int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
286
287   assert(s8 == imm->reg.data.s32);
288
289   code[0] |= (s8 & 0x3f) << 26;
290   code[0] |= (s8 >> 6) << 8;
291}
292
293void
294CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
295{
296   code[0] = opc;
297   code[1] = opc >> 32;
298
299   emitPredicate(i);
300
301   defId(i->def(0), 14);
302
303   int s1 = 26;
304   if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
305      s1 = 49;
306
307   for (int s = 0; s < 3 && i->srcExists(s); ++s) {
308      switch (i->getSrc(s)->reg.file) {
309      case FILE_MEMORY_CONST:
310         assert(!(code[1] & 0xc000));
311         code[1] |= (s == 2) ? 0x8000 : 0x4000;
312         code[1] |= i->getSrc(s)->reg.fileIndex << 10;
313         setAddress16(i->src(s));
314         break;
315      case FILE_IMMEDIATE:
316         assert(s == 1 ||
317                i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
318         assert(!(code[1] & 0xc000));
319         setImmediate(i, s);
320         break;
321      case FILE_GPR:
322         if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
323            break;
324         srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
325         break;
326      default:
327         // ignore here, can be predicate or flags, but must not be address
328         break;
329      }
330   }
331}
332
333void
334CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
335{
336   code[0] = opc;
337   code[1] = opc >> 32;
338
339   emitPredicate(i);
340
341   defId(i->def(0), 14);
342
343   switch (i->src(0).getFile()) {
344   case FILE_MEMORY_CONST:
345      assert(!(code[1] & 0xc000));
346      code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
347      setAddress16(i->src(0));
348      break;
349   case FILE_IMMEDIATE:
350      assert(!(code[1] & 0xc000));
351      setImmediate(i, 0);
352      break;
353   case FILE_GPR:
354      srcId(i->src(0), 26);
355      break;
356   default:
357      // ignore here, can be predicate or flags, but must not be address
358      break;
359   }
360}
361
362void
363CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
364{
365   code[0] = opc;
366
367   int ss2a = 0;
368   if (opc == 0x0d || opc == 0x0e)
369      ss2a = 2;
370
371   defId(i->def(0), 14);
372   srcId(i->src(0), 20);
373
374   assert(pred || (i->predSrc < 0));
375   if (pred)
376      emitPredicate(i);
377
378   for (int s = 1; s < 3 && i->srcExists(s); ++s) {
379      if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
380         assert(!(code[0] & (0x300 >> ss2a)));
381         switch (i->src(s).get()->reg.fileIndex) {
382         case 0:  code[0] |= 0x100 >> ss2a; break;
383         case 1:  code[0] |= 0x200 >> ss2a; break;
384         case 16: code[0] |= 0x300 >> ss2a; break;
385         default:
386            ERROR("invalid c[] space for short form\n");
387            break;
388         }
389         if (s == 1)
390            code[0] |= i->getSrc(s)->reg.data.offset << 24;
391         else
392            code[0] |= i->getSrc(s)->reg.data.offset << 6;
393      } else
394      if (i->src(s).getFile() == FILE_IMMEDIATE) {
395         assert(s == 1);
396         setImmediateS8(i->src(s));
397      } else
398      if (i->src(s).getFile() == FILE_GPR) {
399         srcId(i->src(s), (s == 1) ? 26 : 8);
400      }
401   }
402}
403
404void
405CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
406{
407   if (src.getFile() == FILE_MEMORY_CONST) {
408      switch (src.get()->reg.fileIndex) {
409      case 0:  code[0] |= 0x100; break;
410      case 1:  code[0] |= 0x200; break;
411      case 16: code[0] |= 0x300; break;
412      default:
413         assert(!"unsupported file index for short op");
414         break;
415      }
416      srcAddr32(src, 20);
417   } else {
418      srcId(src, 20);
419      assert(src.getFile() == FILE_GPR);
420   }
421}
422
423void
424CodeEmitterNVC0::emitNOP(const Instruction *i)
425{
426   code[0] = 0x000001e4;
427   code[1] = 0x40000000;
428   emitPredicate(i);
429}
430
431void
432CodeEmitterNVC0::emitFMAD(const Instruction *i)
433{
434   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
435
436   if (i->encSize == 8) {
437      if (isLIMM(i->src(1), TYPE_F32)) {
438         emitForm_A(i, HEX64(20000000, 00000002));
439      } else {
440         emitForm_A(i, HEX64(30000000, 00000000));
441
442         if (i->src(2).mod.neg())
443            code[0] |= 1 << 8;
444      }
445      roundMode_A(i);
446
447      if (neg1)
448         code[0] |= 1 << 9;
449
450      if (i->saturate)
451         code[0] |= 1 << 5;
452      if (i->ftz)
453         code[0] |= 1 << 6;
454   } else {
455      assert(!i->saturate && !i->src(2).mod.neg());
456      emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
457                 false);
458      if (neg1)
459         code[0] |= 1 << 4;
460   }
461}
462
463void
464CodeEmitterNVC0::emitFMUL(const Instruction *i)
465{
466   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
467
468   assert(i->postFactor >= -3 && i->postFactor <= 3);
469
470   if (i->encSize == 8) {
471      if (isLIMM(i->src(1), TYPE_F32)) {
472         assert(i->postFactor == 0); // constant folded, hopefully
473         emitForm_A(i, HEX64(30000000, 00000002));
474      } else {
475         emitForm_A(i, HEX64(58000000, 00000000));
476         roundMode_A(i);
477         code[1] |= ((i->postFactor > 0) ?
478                     (7 - i->postFactor) : (0 - i->postFactor)) << 17;
479      }
480      if (neg)
481         code[1] ^= 1 << 25; // aliases with LIMM sign bit
482
483      if (i->saturate)
484         code[0] |= 1 << 5;
485
486      if (i->dnz)
487         code[0] |= 1 << 7;
488      else
489      if (i->ftz)
490         code[0] |= 1 << 6;
491   } else {
492      assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
493      emitForm_S(i, 0xa8, true);
494   }
495}
496
497void
498CodeEmitterNVC0::emitUMUL(const Instruction *i)
499{
500   if (i->encSize == 8) {
501      if (i->src(1).getFile() == FILE_IMMEDIATE) {
502         emitForm_A(i, HEX64(10000000, 00000002));
503      } else {
504         emitForm_A(i, HEX64(50000000, 00000003));
505      }
506      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
507         code[0] |= 1 << 6;
508      if (i->sType == TYPE_S32)
509         code[0] |= 1 << 5;
510      if (i->dType == TYPE_S32)
511         code[0] |= 1 << 7;
512   } else {
513      emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
514
515      if (i->sType == TYPE_S32)
516         code[0] |= 1 << 6;
517   }
518}
519
520void
521CodeEmitterNVC0::emitFADD(const Instruction *i)
522{
523   if (i->encSize == 8) {
524      if (isLIMM(i->src(1), TYPE_F32)) {
525         assert(!i->saturate);
526         emitForm_A(i, HEX64(28000000, 00000002));
527
528         code[0] |= i->src(0).mod.abs() << 7;
529         code[0] |= i->src(0).mod.neg() << 9;
530
531         if (i->src(1).mod.abs())
532            code[1] &= 0xfdffffff;
533         if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
534            code[1] ^= 0x02000000;
535      } else {
536         emitForm_A(i, HEX64(50000000, 00000000));
537
538         roundMode_A(i);
539         if (i->saturate)
540            code[1] |= 1 << 17;
541
542         emitNegAbs12(i);
543         if (i->op == OP_SUB) code[0] ^= 1 << 8;
544      }
545      if (i->ftz)
546         code[0] |= 1 << 5;
547   } else {
548      assert(!i->saturate && i->op != OP_SUB &&
549             !i->src(0).mod.abs() &&
550             !i->src(1).mod.neg() && !i->src(1).mod.abs());
551
552      emitForm_S(i, 0x49, true);
553
554      if (i->src(0).mod.neg())
555         code[0] |= 1 << 7;
556   }
557}
558
559void
560CodeEmitterNVC0::emitUADD(const Instruction *i)
561{
562   uint32_t addOp = 0;
563
564   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
565   assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
566
567   if (i->src(0).mod.neg())
568      addOp |= 0x200;
569   if (i->src(1).mod.neg())
570      addOp |= 0x100;
571   if (i->op == OP_SUB) {
572      addOp ^= 0x100;
573      assert(addOp != 0x300); // would be add-plus-one
574   }
575
576   if (i->encSize == 8) {
577      if (isLIMM(i->src(1), TYPE_U32)) {
578         emitForm_A(i, HEX64(08000000, 00000002));
579         if (i->defExists(1))
580            code[1] |= 1 << 26; // write carry
581      } else {
582         emitForm_A(i, HEX64(48000000, 00000003));
583         if (i->defExists(1))
584            code[1] |= 1 << 16; // write carry
585      }
586      code[0] |= addOp;
587
588      if (i->saturate)
589         code[0] |= 1 << 5;
590      if (i->flagsSrc >= 0) // add carry
591         code[0] |= 1 << 6;
592   } else {
593      assert(!(addOp & 0x100));
594      emitForm_S(i, (addOp >> 3) |
595                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
596   }
597}
598
599// TODO: shl-add
600void
601CodeEmitterNVC0::emitIMAD(const Instruction *i)
602{
603   assert(i->encSize == 8);
604   emitForm_A(i, HEX64(20000000, 00000003));
605
606   if (isSignedType(i->dType))
607      code[0] |= 1 << 7;
608   if (isSignedType(i->sType))
609      code[0] |= 1 << 5;
610
611   code[1] |= i->saturate << 24;
612
613   if (i->flagsDef >= 0) code[1] |= 1 << 16;
614   if (i->flagsSrc >= 0) code[1] |= 1 << 23;
615
616   if (i->src(2).mod.neg()) code[0] |= 0x10;
617   if (i->src(1).mod.neg() ^
618       i->src(0).mod.neg()) code[0] |= 0x20;
619
620   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
621      code[0] |= 1 << 6;
622}
623
624void
625CodeEmitterNVC0::emitISAD(const Instruction *i)
626{
627   assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
628   assert(i->encSize == 8);
629
630   emitForm_A(i, HEX64(38000000, 00000003));
631
632   if (i->dType == TYPE_S32)
633      code[0] |= 1 << 5;
634}
635
636void
637CodeEmitterNVC0::emitNOT(Instruction *i)
638{
639   assert(i->encSize == 8);
640   i->setSrc(1, i->src(0));
641   emitForm_A(i, HEX64(68000000, 000001c3));
642}
643
644void
645CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
646{
647   if (i->encSize == 8) {
648      if (isLIMM(i->src(1), TYPE_U32)) {
649         emitForm_A(i, HEX64(38000000, 00000002));
650
651         if (i->srcExists(2))
652            code[1] |= 1 << 26;
653      } else {
654         emitForm_A(i, HEX64(68000000, 00000003));
655
656         if (i->srcExists(2))
657            code[1] |= 1 << 16;
658      }
659      code[0] |= subOp << 6;
660
661      if (i->srcExists(2)) // carry
662         code[0] |= 1 << 5;
663
664      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
665      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
666   } else {
667      emitForm_S(i, (subOp << 5) |
668                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
669   }
670}
671
672void
673CodeEmitterNVC0::emitPOPC(const Instruction *i)
674{
675   emitForm_A(i, HEX64(54000000, 00000004));
676
677   if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
678   if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
679}
680
681void
682CodeEmitterNVC0::emitINSBF(const Instruction *i)
683{
684   emitForm_A(i, HEX64(28000000, 30000000));
685}
686
687void
688CodeEmitterNVC0::emitShift(const Instruction *i)
689{
690   if (i->op == OP_SHR) {
691      emitForm_A(i, HEX64(58000000, 00000003)
692                 | (isSignedType(i->dType) ? 0x20 : 0x00));
693   } else {
694      emitForm_A(i, HEX64(60000000, 00000003));
695   }
696
697   if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
698      code[0] |= 1 << 9;
699}
700
701void
702CodeEmitterNVC0::emitPreOp(const Instruction *i)
703{
704   if (i->encSize == 8) {
705      emitForm_B(i, HEX64(60000000, 00000000));
706
707      if (i->op == OP_PREEX2)
708         code[0] |= 0x20;
709
710      if (i->src(0).mod.abs()) code[0] |= 1 << 6;
711      if (i->src(0).mod.neg()) code[0] |= 1 << 8;
712   } else {
713      emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
714   }
715}
716
717void
718CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
719{
720   if (i->encSize == 8) {
721      code[0] = 0x00000000 | (subOp << 26);
722      code[1] = 0xc8000000;
723
724      emitPredicate(i);
725
726      defId(i->def(0), 14);
727      srcId(i->src(0), 20);
728
729      assert(i->src(0).getFile() == FILE_GPR);
730
731      if (i->saturate) code[0] |= 1 << 5;
732
733      if (i->src(0).mod.abs()) code[0] |= 1 << 7;
734      if (i->src(0).mod.neg()) code[0] |= 1 << 9;
735   } else {
736      emitForm_S(i, 0x80000008 | (subOp << 26), true);
737
738      assert(!i->src(0).mod.neg());
739      if (i->src(0).mod.abs()) code[0] |= 1 << 30;
740   }
741}
742
743void
744CodeEmitterNVC0::emitMINMAX(const Instruction *i)
745{
746   uint64_t op;
747
748   assert(i->encSize == 8);
749
750   op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
751
752   if (i->ftz)
753      op |= 1 << 5;
754   else
755   if (!isFloatType(i->dType))
756      op |= isSignedType(i->dType) ? 0x23 : 0x03;
757
758   emitForm_A(i, op);
759   emitNegAbs12(i);
760}
761
762void
763CodeEmitterNVC0::roundMode_C(const Instruction *i)
764{
765   switch (i->rnd) {
766   case ROUND_M:  code[1] |= 1 << 17; break;
767   case ROUND_P:  code[1] |= 2 << 17; break;
768   case ROUND_Z:  code[1] |= 3 << 17; break;
769   case ROUND_NI: code[0] |= 1 << 7; break;
770   case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
771   case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
772   case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
773   case ROUND_N: break;
774   default:
775      assert(!"invalid round mode");
776      break;
777   }
778}
779
780void
781CodeEmitterNVC0::roundMode_CS(const Instruction *i)
782{
783   switch (i->rnd) {
784   case ROUND_M:
785   case ROUND_MI: code[0] |= 1 << 16; break;
786   case ROUND_P:
787   case ROUND_PI: code[0] |= 2 << 16; break;
788   case ROUND_Z:
789   case ROUND_ZI: code[0] |= 3 << 16; break;
790   default:
791      break;
792   }
793}
794
795void
796CodeEmitterNVC0::emitCVT(Instruction *i)
797{
798   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
799
800   switch (i->op) {
801   case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
802   case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
803   case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
804   default:
805      break;
806   }
807
808   const bool sat = (i->op == OP_SAT) || i->saturate;
809   const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
810   const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
811
812   if (i->encSize == 8) {
813      emitForm_B(i, HEX64(10000000, 00000004));
814
815      roundMode_C(i);
816
817      // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
818      code[0] |= util_logbase2(typeSizeof(i->dType)) << 20;
819      code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
820
821      if (sat)
822         code[0] |= 0x20;
823      if (abs)
824         code[0] |= 1 << 6;
825      if (neg && i->op != OP_ABS)
826         code[0] |= 1 << 8;
827
828      if (i->ftz)
829         code[1] |= 1 << 23;
830
831      if (isSignedIntType(i->dType))
832         code[0] |= 0x080;
833      if (isSignedIntType(i->sType))
834         code[0] |= 0x200;
835
836      if (isFloatType(i->dType)) {
837         if (!isFloatType(i->sType))
838            code[1] |= 0x08000000;
839      } else {
840         if (isFloatType(i->sType))
841            code[1] |= 0x04000000;
842         else
843            code[1] |= 0x0c000000;
844      }
845   } else {
846      if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
847         code[0] = 0x298;
848      } else
849      if (isFloatType(i->dType)) {
850         if (isFloatType(i->sType))
851            code[0] = 0x098;
852         else
853            code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
854      } else {
855         assert(isFloatType(i->sType));
856
857         code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
858      }
859
860      if (neg) code[0] |= 1 << 16;
861      if (sat) code[0] |= 1 << 18;
862      if (abs) code[0] |= 1 << 19;
863
864      roundMode_CS(i);
865   }
866}
867
868void
869CodeEmitterNVC0::emitSET(const CmpInstruction *i)
870{
871   uint32_t hi;
872   uint32_t lo = 0;
873
874   if (i->sType == TYPE_F64)
875      lo = 0x1;
876   else
877   if (!isFloatType(i->sType))
878      lo = 0x3;
879
880   if (isFloatType(i->dType) || isSignedIntType(i->sType))
881      lo |= 0x20;
882
883   switch (i->op) {
884   case OP_SET_AND: hi = 0x10000000; break;
885   case OP_SET_OR:  hi = 0x10200000; break;
886   case OP_SET_XOR: hi = 0x10400000; break;
887   default:
888      hi = 0x100e0000;
889      break;
890   }
891   emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
892
893   if (i->op != OP_SET)
894      srcId(i->src(2), 32 + 17);
895
896   if (i->def(0).getFile() == FILE_PREDICATE) {
897      if (i->sType == TYPE_F32)
898         code[1] += 0x10000000;
899      else
900         code[1] += 0x08000000;
901
902      code[0] &= ~0xfc000;
903      defId(i->def(0), 17);
904      if (i->defExists(1))
905         defId(i->def(1), 14);
906      else
907         code[0] |= 0x1c000;
908   }
909
910   if (i->ftz)
911      code[1] |= 1 << 27;
912
913   emitCondCode(i->setCond, 32 + 23);
914   emitNegAbs12(i);
915}
916
917void
918CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
919{
920   uint64_t op;
921
922   switch (i->dType) {
923   case TYPE_S32:
924      op = HEX64(30000000, 00000023);
925      break;
926   case TYPE_U32:
927      op = HEX64(30000000, 00000003);
928      break;
929   case TYPE_F32:
930      op = HEX64(38000000, 00000000);
931      break;
932   default:
933      assert(!"invalid type for SLCT");
934      op = 0;
935      break;
936   }
937   emitForm_A(i, op);
938
939   CondCode cc = i->setCond;
940
941   if (i->src(2).mod.neg())
942      cc = reverseCondCode(cc);
943
944   emitCondCode(cc, 32 + 23);
945
946   if (i->ftz)
947      code[0] |= 1 << 5;
948}
949
950void CodeEmitterNVC0::emitSELP(const Instruction *i)
951{
952   emitForm_A(i, HEX64(20000000, 00000004));
953
954   if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
955      code[1] |= 1 << 20;
956}
957
958void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
959{
960   code[0] = 0x00000006 | (i->subOp << 26);
961   code[1] = 0xf0000000;
962   emitPredicate(i);
963   emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
964}
965
966void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
967{
968   code[0] = 0x00000086;
969   code[1] = 0xd0000000;
970
971   code[1] |= i->tex.r;
972   code[1] |= i->tex.s << 8;
973
974   if (i->tex.liveOnly)
975      code[0] |= 1 << 9;
976
977   defId(i->def(0), 14);
978   srcId(i->src(0), 20);
979}
980
981static inline bool
982isNextIndependentTex(const TexInstruction *i)
983{
984   if (!i->next || !isTextureOp(i->next->op))
985      return false;
986   if (i->getDef(0)->interfers(i->next->getSrc(0)))
987      return false;
988   return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
989}
990
991void
992CodeEmitterNVC0::emitTEX(const TexInstruction *i)
993{
994   code[0] = 0x00000006;
995
996   if (isNextIndependentTex(i))
997      code[0] |= 0x080; // t mode
998   else
999      code[0] |= 0x100; // p mode
1000
1001   if (i->tex.liveOnly)
1002      code[0] |= 1 << 9;
1003
1004   switch (i->op) {
1005   case OP_TEX: code[1] = 0x80000000; break;
1006   case OP_TXB: code[1] = 0x84000000; break;
1007   case OP_TXL: code[1] = 0x86000000; break;
1008   case OP_TXF: code[1] = 0x90000000; break;
1009   case OP_TXG: code[1] = 0xa0000000; break;
1010   case OP_TXD: code[1] = 0xe0000000; break;
1011   default:
1012      assert(!"invalid texture op");
1013      break;
1014   }
1015   if (i->op == OP_TXF) {
1016      if (!i->tex.levelZero)
1017         code[1] |= 0x02000000;
1018   } else
1019   if (i->tex.levelZero) {
1020      code[1] |= 0x02000000;
1021   }
1022
1023   if (i->op != OP_TXD && i->tex.derivAll)
1024      code[1] |= 1 << 13;
1025
1026   defId(i->def(0), 14);
1027   srcId(i->src(0), 20);
1028
1029   emitPredicate(i);
1030
1031   if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1032
1033   code[1] |= i->tex.mask << 14;
1034
1035   code[1] |= i->tex.r;
1036   code[1] |= i->tex.s << 8;
1037   if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1038      code[1] |= 1 << 18; // in 1st source (with array index)
1039
1040   // texture target:
1041   code[1] |= (i->tex.target.getDim() - 1) << 20;
1042   if (i->tex.target.isCube())
1043      code[1] += 2 << 20;
1044   if (i->tex.target.isArray())
1045      code[1] |= 1 << 19;
1046   if (i->tex.target.isShadow())
1047      code[1] |= 1 << 24;
1048
1049   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1050
1051   if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1052      // lzero
1053      if (i->op == OP_TXL)
1054         code[1] &= ~(1 << 26);
1055      else
1056      if (i->op == OP_TXF)
1057         code[1] &= ~(1 << 25);
1058   }
1059   if (i->tex.target == TEX_TARGET_2D_MS ||
1060       i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1061      code[1] |= 1 << 23;
1062
1063   if (i->tex.useOffsets) // in vecSrc0.w
1064      code[1] |= 1 << 22;
1065
1066   srcId(i, src1, 26);
1067}
1068
1069void
1070CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1071{
1072   code[0] = 0x00000086;
1073   code[1] = 0xc0000000;
1074
1075   switch (i->tex.query) {
1076   case TXQ_DIMS:            code[1] |= 0 << 22; break;
1077   case TXQ_TYPE:            code[1] |= 1 << 22; break;
1078   case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1079   case TXQ_FILTER:          code[1] |= 3 << 22; break;
1080   case TXQ_LOD:             code[1] |= 4 << 22; break;
1081   case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1082   default:
1083      assert(!"invalid texture query");
1084      break;
1085   }
1086
1087   code[1] |= i->tex.mask << 14;
1088
1089   code[1] |= i->tex.r;
1090   code[1] |= i->tex.s << 8;
1091   if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1092      code[1] |= 1 << 18;
1093
1094   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1095
1096   defId(i->def(0), 14);
1097   srcId(i->src(0), 20);
1098   srcId(i, src1, 26);
1099
1100   emitPredicate(i);
1101}
1102
1103void
1104CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1105{
1106   code[0] = 0x00000000 | (laneMask << 6);
1107   code[1] = 0x48000000 | qOp;
1108
1109   defId(i->def(0), 14);
1110   srcId(i->src(0), 20);
1111   srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1112
1113   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1114      code[0] |= 1 << 9; // dall
1115
1116   emitPredicate(i);
1117}
1118
1119void
1120CodeEmitterNVC0::emitFlow(const Instruction *i)
1121{
1122   const FlowInstruction *f = i->asFlow();
1123
1124   unsigned mask; // bit 0: predicate, bit 1: target
1125
1126   code[0] = 0x00000007;
1127
1128   switch (i->op) {
1129   case OP_BRA:
1130      code[1] = f->absolute ? 0x00000000 : 0x40000000;
1131      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1132         code[0] |= 0x4000;
1133      mask = 3;
1134      break;
1135   case OP_CALL:
1136      code[1] = f->absolute ? 0x10000000 : 0x50000000;
1137      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1138         code[0] |= 0x4000;
1139      mask = 2;
1140      break;
1141
1142   case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1143   case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1144   case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1145   case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1146   case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1147
1148   case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1149   case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1150   case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1151   case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1152
1153   case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1154   case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1155   case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1156   default:
1157      assert(!"invalid flow operation");
1158      return;
1159   }
1160
1161   if (mask & 1) {
1162      emitPredicate(i);
1163      if (i->flagsSrc < 0)
1164         code[0] |= 0x1e0;
1165   }
1166
1167   if (!f)
1168      return;
1169
1170   if (f->allWarp)
1171      code[0] |= 1 << 15;
1172   if (f->limit)
1173      code[0] |= 1 << 16;
1174
1175   if (f->op == OP_CALL) {
1176      if (f->builtin) {
1177         assert(f->absolute);
1178         uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
1179         addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1180         addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1181      } else {
1182         assert(!f->absolute);
1183         int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1184         code[0] |= (pcRel & 0x3f) << 26;
1185         code[1] |= (pcRel >> 6) & 0x3ffff;
1186      }
1187   } else
1188   if (mask & 2) {
1189      int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1190      // currently we don't want absolute branches
1191      assert(!f->absolute);
1192      code[0] |= (pcRel & 0x3f) << 26;
1193      code[1] |= (pcRel >> 6) & 0x3ffff;
1194   }
1195}
1196
1197void
1198CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1199{
1200   uint32_t prim = i->src(0).get()->reg.data.u32;
1201
1202   code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1203   code[1] = 0x00000000 | (prim >> 6);
1204
1205   emitPredicate(i);
1206
1207   defId(i->def(0), 14);
1208   srcId(i->src(1), 20);
1209}
1210
1211void
1212CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1213{
1214   code[0] = 0x00000006;
1215   code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1216
1217   if (i->perPatch)
1218      code[0] |= 0x100;
1219   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1220      code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1221
1222   emitPredicate(i);
1223
1224   code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1225
1226   defId(i->def(0), 14);
1227   srcId(i->src(0).getIndirect(0), 20);
1228   srcId(i->src(0).getIndirect(1), 26); // vertex address
1229}
1230
1231void
1232CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1233{
1234   unsigned int size = typeSizeof(i->dType);
1235
1236   code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1237   code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1238
1239   assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1240
1241   if (i->perPatch)
1242      code[0] |= 0x100;
1243
1244   emitPredicate(i);
1245
1246   assert(i->src(1).getFile() == FILE_GPR);
1247
1248   srcId(i->src(0).getIndirect(0), 20);
1249   srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1250   srcId(i->src(1), 26);
1251}
1252
1253void
1254CodeEmitterNVC0::emitOUT(const Instruction *i)
1255{
1256   code[0] = 0x00000006;
1257   code[1] = 0x1c000000;
1258
1259   emitPredicate(i);
1260
1261   defId(i->def(0), 14); // new secret address
1262   srcId(i->src(0), 20); // old secret address, should be 0 initially
1263
1264   assert(i->src(0).getFile() == FILE_GPR);
1265
1266   if (i->op == OP_EMIT)
1267      code[0] |= 1 << 5;
1268   if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1269      code[0] |= 1 << 6;
1270
1271   // vertex stream
1272   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1273      code[1] |= 0xc000;
1274      code[0] |= SDATA(i->src(1)).u32 << 26;
1275   } else {
1276      srcId(i->src(1), 26);
1277   }
1278}
1279
1280void
1281CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1282{
1283   if (i->encSize == 8) {
1284      code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1285   } else {
1286      if (i->getInterpMode() == NV50_IR_INTERP_SC)
1287         code[0] |= 0x80;
1288      assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1289   }
1290}
1291
1292void
1293CodeEmitterNVC0::emitINTERP(const Instruction *i)
1294{
1295   const uint32_t base = i->getSrc(0)->reg.data.offset;
1296
1297   if (i->encSize == 8) {
1298      code[0] = 0x00000000;
1299      code[1] = 0xc0000000 | (base & 0xffff);
1300
1301      if (i->saturate)
1302         code[0] |= 1 << 5;
1303
1304      if (i->op == OP_PINTERP)
1305         srcId(i->src(1), 26);
1306      else
1307         code[0] |= 0x3f << 26;
1308
1309      srcId(i->src(0).getIndirect(0), 20);
1310   } else {
1311      assert(i->op == OP_PINTERP);
1312      code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1313      srcId(i->src(1), 20);
1314   }
1315   emitInterpMode(i);
1316
1317   emitPredicate(i);
1318   defId(i->def(0), 14);
1319
1320   if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1321      srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
1322   else
1323      code[1] |= 0x3f << 17;
1324}
1325
1326void
1327CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1328{
1329   uint8_t val;
1330
1331   switch (ty) {
1332   case TYPE_U8:
1333      val = 0x00;
1334      break;
1335   case TYPE_S8:
1336      val = 0x20;
1337      break;
1338   case TYPE_F16:
1339   case TYPE_U16:
1340      val = 0x40;
1341      break;
1342   case TYPE_S16:
1343      val = 0x60;
1344      break;
1345   case TYPE_F32:
1346   case TYPE_U32:
1347   case TYPE_S32:
1348      val = 0x80;
1349      break;
1350   case TYPE_F64:
1351   case TYPE_U64:
1352   case TYPE_S64:
1353      val = 0xa0;
1354      break;
1355   case TYPE_B128:
1356      val = 0xc0;
1357      break;
1358   default:
1359      val = 0x80;
1360      assert(!"invalid type");
1361      break;
1362   }
1363   code[0] |= val;
1364}
1365
1366void
1367CodeEmitterNVC0::emitCachingMode(CacheMode c)
1368{
1369   uint32_t val;
1370
1371   switch (c) {
1372   case CACHE_CA:
1373// case CACHE_WB:
1374      val = 0x000;
1375      break;
1376   case CACHE_CG:
1377      val = 0x100;
1378      break;
1379   case CACHE_CS:
1380      val = 0x200;
1381      break;
1382   case CACHE_CV:
1383// case CACHE_WT:
1384      val = 0x300;
1385      break;
1386   default:
1387      val = 0;
1388      assert(!"invalid caching mode");
1389      break;
1390   }
1391   code[0] |= val;
1392}
1393
1394void
1395CodeEmitterNVC0::emitSTORE(const Instruction *i)
1396{
1397   uint32_t opc;
1398
1399   switch (i->src(0).getFile()) {
1400   case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1401   case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1402   case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1403   default:
1404      assert(!"invalid memory file");
1405      opc = 0;
1406      break;
1407   }
1408   code[0] = 0x00000005;
1409   code[1] = opc;
1410
1411   setAddress16(i->src(0));
1412   srcId(i->src(1), 14);
1413   srcId(i->src(0).getIndirect(0), 20);
1414
1415   emitPredicate(i);
1416
1417   emitLoadStoreType(i->dType);
1418   emitCachingMode(i->cache);
1419}
1420
1421void
1422CodeEmitterNVC0::emitLOAD(const Instruction *i)
1423{
1424   uint32_t opc;
1425
1426   code[0] = 0x00000005;
1427
1428   switch (i->src(0).getFile()) {
1429   case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1430   case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1431   case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1432   case FILE_MEMORY_CONST:
1433      if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1434         emitMOV(i); // not sure if this is any better
1435         return;
1436      }
1437      opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1438      code[0] = 0x00000006 | (i->subOp << 8);
1439      break;
1440   default:
1441      assert(!"invalid memory file");
1442      opc = 0;
1443      break;
1444   }
1445   code[1] = opc;
1446
1447   defId(i->def(0), 14);
1448
1449   setAddress16(i->src(0));
1450   srcId(i->src(0).getIndirect(0), 20);
1451
1452   emitPredicate(i);
1453
1454   emitLoadStoreType(i->dType);
1455   emitCachingMode(i->cache);
1456}
1457
1458uint8_t
1459CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1460{
1461   switch (SDATA(ref).sv.sv) {
1462   case SV_LANEID:        return 0x00;
1463   case SV_PHYSID:        return 0x03;
1464   case SV_VERTEX_COUNT:  return 0x10;
1465   case SV_INVOCATION_ID: return 0x11;
1466   case SV_YDIR:          return 0x12;
1467   case SV_TID:           return 0x21 + SDATA(ref).sv.index;
1468   case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
1469   case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
1470   case SV_GRIDID:        return 0x2c;
1471   case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
1472   case SV_LBASE:         return 0x34;
1473   case SV_SBASE:         return 0x30;
1474   case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
1475   default:
1476      assert(!"no sreg for system value");
1477      return 0;
1478   }
1479}
1480
1481void
1482CodeEmitterNVC0::emitMOV(const Instruction *i)
1483{
1484   if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1485      uint8_t sr = getSRegEncoding(i->src(0));
1486
1487      if (i->encSize == 8) {
1488         code[0] = 0x00000004 | (sr << 26);
1489         code[1] = 0x2c000000;
1490      } else {
1491         code[0] = 0x40000008 | (sr << 20);
1492      }
1493      defId(i->def(0), 14);
1494
1495      emitPredicate(i);
1496   } else
1497   if (i->encSize == 8) {
1498      uint64_t opc;
1499
1500      if (i->src(0).getFile() == FILE_IMMEDIATE)
1501         opc = HEX64(18000000, 000001e2);
1502      else
1503      if (i->src(0).getFile() == FILE_PREDICATE)
1504         opc = HEX64(080e0000, 1c000004);
1505      else
1506         opc = HEX64(28000000, 00000004);
1507
1508      opc |= i->lanes << 5;
1509
1510      emitForm_B(i, opc);
1511   } else {
1512      uint32_t imm;
1513
1514      if (i->src(0).getFile() == FILE_IMMEDIATE) {
1515         imm = SDATA(i->src(0)).u32;
1516         if (imm & 0xfff00000) {
1517            assert(!(imm & 0x000fffff));
1518            code[0] = 0x00000318 | imm;
1519         } else {
1520            assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1521            code[0] = 0x00000118 | (imm << 20);
1522         }
1523      } else {
1524         code[0] = 0x0028;
1525         emitShortSrc2(i->src(0));
1526      }
1527      defId(i->def(0), 14);
1528
1529      emitPredicate(i);
1530   }
1531}
1532
1533bool
1534CodeEmitterNVC0::emitInstruction(Instruction *insn)
1535{
1536   unsigned int size = insn->encSize;
1537
1538   if (writeIssueDelays && !(codeSize & 0x3f))
1539      size += 8;
1540
1541   if (!insn->encSize) {
1542      ERROR("skipping unencodable instruction: "); insn->print();
1543      return false;
1544   } else
1545   if (codeSize + size > codeSizeLimit) {
1546      ERROR("code emitter output buffer too small\n");
1547      return false;
1548   }
1549
1550   if (writeIssueDelays) {
1551      if (!(codeSize & 0x3f)) {
1552         code[0] = 0x00000007; // cf issue delay "instruction"
1553         code[1] = 0x20000000;
1554         code += 2;
1555         codeSize += 8;
1556      }
1557      const unsigned int id = (codeSize & 0x3f) / 8 - 1;
1558      uint32_t *data = code - (id * 2 + 2);
1559      if (id <= 2) {
1560         data[0] |= insn->sched << (id * 8 + 4);
1561      } else
1562      if (id == 3) {
1563         data[0] |= insn->sched << 28;
1564         data[1] |= insn->sched >> 4;
1565      } else {
1566         data[1] |= insn->sched << ((id - 4) * 8 + 4);
1567      }
1568   }
1569
1570   // assert that instructions with multiple defs don't corrupt registers
1571   for (int d = 0; insn->defExists(d); ++d)
1572      assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
1573
1574   switch (insn->op) {
1575   case OP_MOV:
1576   case OP_RDSV:
1577      emitMOV(insn);
1578      break;
1579   case OP_NOP:
1580      break;
1581   case OP_LOAD:
1582      emitLOAD(insn);
1583      break;
1584   case OP_STORE:
1585      emitSTORE(insn);
1586      break;
1587   case OP_LINTERP:
1588   case OP_PINTERP:
1589      emitINTERP(insn);
1590      break;
1591   case OP_VFETCH:
1592      emitVFETCH(insn);
1593      break;
1594   case OP_EXPORT:
1595      emitEXPORT(insn);
1596      break;
1597   case OP_PFETCH:
1598      emitPFETCH(insn);
1599      break;
1600   case OP_EMIT:
1601   case OP_RESTART:
1602      emitOUT(insn);
1603      break;
1604   case OP_ADD:
1605   case OP_SUB:
1606      if (isFloatType(insn->dType))
1607         emitFADD(insn);
1608      else
1609         emitUADD(insn);
1610      break;
1611   case OP_MUL:
1612      if (isFloatType(insn->dType))
1613         emitFMUL(insn);
1614      else
1615         emitUMUL(insn);
1616      break;
1617   case OP_MAD:
1618   case OP_FMA:
1619      if (isFloatType(insn->dType))
1620         emitFMAD(insn);
1621      else
1622         emitIMAD(insn);
1623      break;
1624   case OP_SAD:
1625      emitISAD(insn);
1626      break;
1627   case OP_NOT:
1628      emitNOT(insn);
1629      break;
1630   case OP_AND:
1631      emitLogicOp(insn, 0);
1632      break;
1633   case OP_OR:
1634      emitLogicOp(insn, 1);
1635      break;
1636   case OP_XOR:
1637      emitLogicOp(insn, 2);
1638      break;
1639   case OP_SHL:
1640   case OP_SHR:
1641      emitShift(insn);
1642      break;
1643   case OP_SET:
1644   case OP_SET_AND:
1645   case OP_SET_OR:
1646   case OP_SET_XOR:
1647      emitSET(insn->asCmp());
1648      break;
1649   case OP_SELP:
1650      emitSELP(insn);
1651      break;
1652   case OP_SLCT:
1653      emitSLCT(insn->asCmp());
1654      break;
1655   case OP_MIN:
1656   case OP_MAX:
1657      emitMINMAX(insn);
1658      break;
1659   case OP_ABS:
1660   case OP_NEG:
1661   case OP_CEIL:
1662   case OP_FLOOR:
1663   case OP_TRUNC:
1664   case OP_CVT:
1665   case OP_SAT:
1666      emitCVT(insn);
1667      break;
1668   case OP_RSQ:
1669      emitSFnOp(insn, 5);
1670      break;
1671   case OP_RCP:
1672      emitSFnOp(insn, 4);
1673      break;
1674   case OP_LG2:
1675      emitSFnOp(insn, 3);
1676      break;
1677   case OP_EX2:
1678      emitSFnOp(insn, 2);
1679      break;
1680   case OP_SIN:
1681      emitSFnOp(insn, 1);
1682      break;
1683   case OP_COS:
1684      emitSFnOp(insn, 0);
1685      break;
1686   case OP_PRESIN:
1687   case OP_PREEX2:
1688      emitPreOp(insn);
1689      break;
1690   case OP_TEX:
1691   case OP_TXB:
1692   case OP_TXL:
1693   case OP_TXD:
1694   case OP_TXF:
1695      emitTEX(insn->asTex());
1696      break;
1697   case OP_TXQ:
1698      emitTXQ(insn->asTex());
1699      break;
1700   case OP_TEXBAR:
1701      emitTEXBAR(insn);
1702      break;
1703   case OP_BRA:
1704   case OP_CALL:
1705   case OP_PRERET:
1706   case OP_RET:
1707   case OP_DISCARD:
1708   case OP_EXIT:
1709   case OP_PRECONT:
1710   case OP_CONT:
1711   case OP_PREBREAK:
1712   case OP_BREAK:
1713   case OP_JOINAT:
1714   case OP_BRKPT:
1715   case OP_QUADON:
1716   case OP_QUADPOP:
1717      emitFlow(insn);
1718      break;
1719   case OP_QUADOP:
1720      emitQUADOP(insn, insn->subOp, insn->lanes);
1721      break;
1722   case OP_DFDX:
1723      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
1724      break;
1725   case OP_DFDY:
1726      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
1727      break;
1728   case OP_POPCNT:
1729      emitPOPC(insn);
1730      break;
1731   case OP_JOIN:
1732      emitNOP(insn);
1733      insn->join = 1;
1734      break;
1735   case OP_PHI:
1736   case OP_UNION:
1737   case OP_CONSTRAINT:
1738      ERROR("operation should have been eliminated");
1739      return false;
1740   case OP_EXP:
1741   case OP_LOG:
1742   case OP_SQRT:
1743   case OP_POW:
1744      ERROR("operation should have been lowered\n");
1745      return false;
1746   default:
1747      ERROR("unknow op\n");
1748      return false;
1749   }
1750
1751   if (insn->join) {
1752      code[0] |= 0x10;
1753      assert(insn->encSize == 8);
1754   }
1755
1756   code += insn->encSize / 4;
1757   codeSize += insn->encSize;
1758   return true;
1759}
1760
1761uint32_t
1762CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
1763{
1764   const Target::OpInfo &info = targ->getOpInfo(i);
1765
1766   if (writeIssueDelays || info.minEncSize == 8 || 1)
1767      return 8;
1768
1769   if (i->ftz || i->saturate || i->join)
1770      return 8;
1771   if (i->rnd != ROUND_N)
1772      return 8;
1773   if (i->predSrc >= 0 && i->op == OP_MAD)
1774      return 8;
1775
1776   if (i->op == OP_PINTERP) {
1777      if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
1778         return 8;
1779   } else
1780   if (i->op == OP_MOV && i->lanes != 0xf) {
1781      return 8;
1782   }
1783
1784   for (int s = 0; i->srcExists(s); ++s) {
1785      if (i->src(s).isIndirect(0))
1786         return 8;
1787
1788      if (i->src(s).getFile() == FILE_MEMORY_CONST) {
1789         if (SDATA(i->src(s)).offset >= 0x100)
1790            return 8;
1791         if (i->getSrc(s)->reg.fileIndex > 1 &&
1792             i->getSrc(s)->reg.fileIndex != 16)
1793             return 8;
1794      } else
1795      if (i->src(s).getFile() == FILE_IMMEDIATE) {
1796         if (i->dType == TYPE_F32) {
1797            if (SDATA(i->src(s)).u32 >= 0x100)
1798               return 8;
1799         } else {
1800            if (SDATA(i->src(s)).u32 > 0xff)
1801               return 8;
1802         }
1803      }
1804
1805      if (i->op == OP_CVT)
1806         continue;
1807      if (i->src(s).mod != Modifier(0)) {
1808         if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
1809            if (i->op != OP_RSQ)
1810               return 8;
1811         if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
1812            if (i->op != OP_ADD || s != 0)
1813               return 8;
1814      }
1815   }
1816
1817   return 4;
1818}
1819
1820// Simplified, erring on safe side.
1821class SchedDataCalculator : public Pass
1822{
1823public:
1824   SchedDataCalculator(const Target *targ) : targ(targ) { }
1825
1826private:
1827   struct RegScores
1828   {
1829      struct Resource {
1830         int st[DATA_FILE_COUNT]; // LD to LD delay 3
1831         int ld[DATA_FILE_COUNT]; // ST to ST delay 3
1832         int tex; // TEX to non-TEX delay 17 (0x11)
1833         int sfu; // SFU to SFU delay 3 (except PRE-ops)
1834         int imul; // integer MUL to MUL delay 3
1835      } res;
1836      struct ScoreData {
1837         int r[64];
1838         int p[8];
1839         int c;
1840      } rd, wr;
1841      int base;
1842
1843      void rebase(const int base)
1844      {
1845         const int delta = this->base - base;
1846         if (!delta)
1847            return;
1848         this->base = 0;
1849
1850         for (int i = 0; i < 64; ++i) {
1851            rd.r[i] += delta;
1852            wr.r[i] += delta;
1853         }
1854         for (int i = 0; i < 8; ++i) {
1855            rd.p[i] += delta;
1856            wr.p[i] += delta;
1857         }
1858         rd.c += delta;
1859         wr.c += delta;
1860
1861         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
1862            res.ld[f] += delta;
1863            res.st[f] += delta;
1864         }
1865         res.sfu += delta;
1866         res.imul += delta;
1867         res.tex += delta;
1868      }
1869      void wipe()
1870      {
1871         memset(&rd, 0, sizeof(rd));
1872         memset(&wr, 0, sizeof(wr));
1873         memset(&res, 0, sizeof(res));
1874      }
1875      int getLatest(const ScoreData& d) const
1876      {
1877         int max = 0;
1878         for (int i = 0; i < 64; ++i)
1879            if (d.r[i] > max)
1880               max = d.r[i];
1881         for (int i = 0; i < 8; ++i)
1882            if (d.p[i] > max)
1883               max = d.p[i];
1884         if (d.c > max)
1885            max = d.c;
1886         return max;
1887      }
1888      inline int getLatestRd() const
1889      {
1890         return getLatest(rd);
1891      }
1892      inline int getLatestWr() const
1893      {
1894         return getLatest(wr);
1895      }
1896      inline int getLatest() const
1897      {
1898         const int a = getLatestRd();
1899         const int b = getLatestWr();
1900
1901         int max = MAX2(a, b);
1902         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
1903            max = MAX2(res.ld[f], max);
1904            max = MAX2(res.st[f], max);
1905         }
1906         max = MAX2(res.sfu, max);
1907         max = MAX2(res.imul, max);
1908         max = MAX2(res.tex, max);
1909         return max;
1910      }
1911      void setMax(const RegScores *that)
1912      {
1913         for (int i = 0; i < 64; ++i) {
1914            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
1915            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
1916         }
1917         for (int i = 0; i < 8; ++i) {
1918            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
1919            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
1920         }
1921         rd.c = MAX2(rd.c, that->rd.c);
1922         wr.c = MAX2(wr.c, that->wr.c);
1923
1924         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
1925            res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
1926            res.st[f] = MAX2(res.st[f], that->res.st[f]);
1927         }
1928         res.sfu = MAX2(res.sfu, that->res.sfu);
1929         res.imul = MAX2(res.imul, that->res.imul);
1930         res.tex = MAX2(res.tex, that->res.tex);
1931      }
1932      void print(int cycle)
1933      {
1934         for (int i = 0; i < 64; ++i) {
1935            if (rd.r[i] > cycle)
1936               INFO("rd $r%i @ %i\n", i, rd.r[i]);
1937            if (wr.r[i] > cycle)
1938               INFO("wr $r%i @ %i\n", i, wr.r[i]);
1939         }
1940         for (int i = 0; i < 8; ++i) {
1941            if (rd.p[i] > cycle)
1942               INFO("rd $p%i @ %i\n", i, rd.p[i]);
1943            if (wr.p[i] > cycle)
1944               INFO("wr $p%i @ %i\n", i, wr.p[i]);
1945         }
1946         if (rd.c > cycle)
1947            INFO("rd $c @ %i\n", rd.c);
1948         if (wr.c > cycle)
1949            INFO("wr $c @ %i\n", wr.c);
1950         if (res.sfu > cycle)
1951            INFO("sfu @ %i\n", res.sfu);
1952         if (res.imul > cycle)
1953            INFO("imul @ %i\n", res.imul);
1954         if (res.tex > cycle)
1955            INFO("tex @ %i\n", res.tex);
1956      }
1957   };
1958
1959   RegScores *score; // for current BB
1960   std::vector<RegScores> scoreBoards;
1961   int cycle;
1962   int prevData;
1963   operation prevOp;
1964
1965   const Target *targ;
1966
1967   bool visit(Function *);
1968   bool visit(BasicBlock *);
1969
1970   void commitInsn(const Instruction *, int cycle);
1971   int calcDelay(const Instruction *, int cycle) const;
1972   void setDelay(Instruction *, int delay, Instruction *next);
1973
1974   void recordRd(const Value *, const int ready);
1975   void recordWr(const Value *, const int ready);
1976   void checkRd(const Value *, int cycle, int& delay) const;
1977   void checkWr(const Value *, int cycle, int& delay) const;
1978
1979   int getCycles(const Instruction *, int origDelay) const;
1980};
1981
1982void
1983SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
1984{
1985   if (insn->op == OP_EXIT)
1986      delay = MAX2(delay, 14);
1987
1988   if (insn->op == OP_TEXBAR) {
1989      // TODO: except if results not used before EXIT
1990      insn->sched = 0xc2;
1991   } else
1992   if (insn->op == OP_JOIN || insn->join) {
1993      insn->sched = 0x00;
1994   } else
1995   if (delay >= 0 || prevData == 0x04 ||
1996       !next || !targ->canDualIssue(insn, next)) {
1997      insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
1998      if (prevOp == OP_EXPORT)
1999         insn->sched |= 0x40;
2000      else
2001         insn->sched |= 0x20;
2002   } else {
2003      insn->sched = 0x04; // dual-issue
2004   }
2005
2006   if (prevData != 0x04 || prevOp != OP_EXPORT)
2007      if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2008         prevOp = insn->op;
2009
2010   prevData = insn->sched;
2011}
2012
2013int
2014SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2015{
2016   if (insn->sched & 0x80) {
2017      int c = (insn->sched & 0x0f) * 2 + 1;
2018      if (insn->op == OP_TEXBAR && origDelay > 0)
2019         c += origDelay;
2020      return c;
2021   }
2022   if (insn->sched & 0x60)
2023      return (insn->sched & 0x1f) + 1;
2024   return (insn->sched == 0x04) ? 0 : 32;
2025}
2026
2027bool
2028SchedDataCalculator::visit(Function *func)
2029{
2030   scoreBoards.resize(func->cfg.getSize());
2031   for (size_t i = 0; i < scoreBoards.size(); ++i)
2032      scoreBoards[i].wipe();
2033   return true;
2034}
2035
2036bool
2037SchedDataCalculator::visit(BasicBlock *bb)
2038{
2039   Instruction *insn;
2040   Instruction *next = NULL;
2041
2042   int cycle = 0;
2043
2044   prevData = 0x00;
2045   prevOp = OP_NOP;
2046   score = &scoreBoards.at(bb->getId());
2047
2048   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2049      BasicBlock *in = BasicBlock::get(ei.getNode());
2050      if (in->getExit()) {
2051         if (prevData != 0x04)
2052            prevData = in->getExit()->sched;
2053         prevOp = in->getExit()->op;
2054      }
2055      if (ei.getType() != Graph::Edge::BACK)
2056         score->setMax(&scoreBoards.at(in->getId()));
2057      // back branches will wait until all target dependencies are satisfied
2058   }
2059   if (bb->cfg.incidentCount() > 1)
2060      prevOp = OP_NOP;
2061
2062#ifdef NVC0_DEBUG_SCHED_DATA
2063   INFO("=== BB:%i initial scores\n", bb->getId());
2064   score->print(cycle);
2065#endif
2066
2067   for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2068      next = insn->next;
2069
2070      commitInsn(insn, cycle);
2071      int delay = calcDelay(next, cycle);
2072      setDelay(insn, delay, next);
2073      cycle += getCycles(insn, delay);
2074
2075#ifdef NVC0_DEBUG_SCHED_DATA
2076      INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2077      insn->print();
2078      next->print();
2079#endif
2080   }
2081   if (!insn)
2082      return true;
2083   commitInsn(insn, cycle);
2084
2085   int bbDelay = -1;
2086
2087   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2088      BasicBlock *out = BasicBlock::get(ei.getNode());
2089
2090      if (ei.getType() != Graph::Edge::BACK) {
2091         // only test the first instruction of the outgoing block
2092         next = out->getEntry();
2093         if (next)
2094            bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2095      } else {
2096         // wait until all dependencies are satisfied
2097         const int regsFree = score->getLatest();
2098         next = out->getFirst();
2099         for (int c = cycle; next && c < regsFree; next = next->next) {
2100            bbDelay = MAX2(bbDelay, calcDelay(next, c));
2101            c += getCycles(next, bbDelay);
2102         }
2103         next = NULL;
2104      }
2105   }
2106   if (bb->cfg.outgoingCount() != 1)
2107      next = NULL;
2108   setDelay(insn, bbDelay, next);
2109   cycle += getCycles(insn, bbDelay);
2110
2111   score->rebase(cycle); // common base for initializing out blocks' scores
2112   return true;
2113}
2114
2115#define NVE4_MAX_ISSUE_DELAY 0x1f
2116int
2117SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2118{
2119   int delay = 0, ready = cycle;
2120
2121   for (int s = 0; insn->srcExists(s); ++s)
2122      checkRd(insn->getSrc(s), cycle, delay);
2123   // WAR & WAW don't seem to matter
2124   // for (int s = 0; insn->srcExists(s); ++s)
2125   //   recordRd(insn->getSrc(s), cycle);
2126
2127   switch (Target::getOpClass(insn->op)) {
2128   case OPCLASS_SFU:
2129      ready = score->res.sfu;
2130      break;
2131   case OPCLASS_ARITH:
2132      if (insn->op == OP_MUL && !isFloatType(insn->dType))
2133         ready = score->res.imul;
2134      break;
2135   case OPCLASS_TEXTURE:
2136      ready = score->res.tex;
2137      break;
2138   case OPCLASS_LOAD:
2139      ready = score->res.ld[insn->src(0).getFile()];
2140      break;
2141   case OPCLASS_STORE:
2142      ready = score->res.st[insn->src(0).getFile()];
2143      break;
2144   default:
2145      break;
2146   }
2147   if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2148      ready = MAX2(ready, score->res.tex);
2149
2150   delay = MAX2(delay, ready - cycle);
2151
2152   // if can issue next cycle, delay is 0, not 1
2153   return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2154}
2155
2156void
2157SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2158{
2159   const int ready = cycle + targ->getLatency(insn);
2160
2161   for (int d = 0; insn->defExists(d); ++d)
2162      recordWr(insn->getDef(d), ready);
2163   // WAR & WAW don't seem to matter
2164   // for (int s = 0; insn->srcExists(s); ++s)
2165   //   recordRd(insn->getSrc(s), cycle);
2166
2167   switch (Target::getOpClass(insn->op)) {
2168   case OPCLASS_SFU:
2169      score->res.sfu = cycle + 4;
2170      break;
2171   case OPCLASS_ARITH:
2172      if (insn->op == OP_MUL && !isFloatType(insn->dType))
2173         score->res.imul = cycle + 4;
2174      break;
2175   case OPCLASS_TEXTURE:
2176      score->res.tex = cycle + 18;
2177      break;
2178   case OPCLASS_LOAD:
2179      if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2180         break;
2181      score->res.ld[insn->src(0).getFile()] = cycle + 4;
2182      score->res.st[insn->src(0).getFile()] = ready;
2183      break;
2184   case OPCLASS_STORE:
2185      score->res.st[insn->src(0).getFile()] = cycle + 4;
2186      score->res.ld[insn->src(0).getFile()] = ready;
2187      break;
2188   case OPCLASS_OTHER:
2189      if (insn->op == OP_TEXBAR)
2190         score->res.tex = cycle;
2191      break;
2192   default:
2193      break;
2194   }
2195
2196#ifdef NVC0_DEBUG_SCHED_DATA
2197   score->print(cycle);
2198#endif
2199}
2200
2201void
2202SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2203{
2204   int ready = cycle;
2205   int a, b;
2206
2207   switch (v->reg.file) {
2208   case FILE_GPR:
2209      a = v->reg.data.id;
2210      b = a + v->reg.size / 4;
2211      for (int r = a; r < b; ++r)
2212         ready = MAX2(ready, score->rd.r[r]);
2213      break;
2214   case FILE_PREDICATE:
2215      ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2216      break;
2217   case FILE_FLAGS:
2218      ready = MAX2(ready, score->rd.c);
2219      break;
2220   case FILE_SHADER_INPUT:
2221   case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2222   case FILE_MEMORY_LOCAL:
2223   case FILE_MEMORY_CONST:
2224   case FILE_MEMORY_SHARED:
2225   case FILE_MEMORY_GLOBAL:
2226   case FILE_SYSTEM_VALUE:
2227      // TODO: any restrictions here ?
2228      break;
2229   case FILE_IMMEDIATE:
2230      break;
2231   default:
2232      assert(0);
2233      break;
2234   }
2235   if (cycle < ready)
2236      delay = MAX2(delay, ready - cycle);
2237}
2238
2239void
2240SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2241{
2242   int ready = cycle;
2243   int a, b;
2244
2245   switch (v->reg.file) {
2246   case FILE_GPR:
2247      a = v->reg.data.id;
2248      b = a + v->reg.size / 4;
2249      for (int r = a; r < b; ++r)
2250         ready = MAX2(ready, score->wr.r[r]);
2251      break;
2252   case FILE_PREDICATE:
2253      ready = MAX2(ready, score->wr.p[v->reg.data.id]);
2254      break;
2255   default:
2256      assert(v->reg.file == FILE_FLAGS);
2257      ready = MAX2(ready, score->wr.c);
2258      break;
2259   }
2260   if (cycle < ready)
2261      delay = MAX2(delay, ready - cycle);
2262}
2263
2264void
2265SchedDataCalculator::recordWr(const Value *v, const int ready)
2266{
2267   int a = v->reg.data.id;
2268
2269   if (v->reg.file == FILE_GPR) {
2270      int b = a + v->reg.size / 4;
2271      for (int r = a; r < b; ++r)
2272         score->rd.r[r] = ready;
2273   } else
2274   // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
2275   if (v->reg.file == FILE_PREDICATE) {
2276      score->rd.p[a] = ready + 4;
2277   } else {
2278      assert(v->reg.file == FILE_FLAGS);
2279      score->rd.c = ready + 4;
2280   }
2281}
2282
2283void
2284SchedDataCalculator::recordRd(const Value *v, const int ready)
2285{
2286   int a = v->reg.data.id;
2287
2288   if (v->reg.file == FILE_GPR) {
2289      int b = a + v->reg.size / 4;
2290      for (int r = a; r < b; ++r)
2291         score->wr.r[r] = ready;
2292   } else
2293   if (v->reg.file == FILE_PREDICATE) {
2294      score->wr.p[a] = ready;
2295   } else
2296   if (v->reg.file == FILE_FLAGS) {
2297      score->wr.c = ready;
2298   }
2299}
2300
2301void
2302CodeEmitterNVC0::prepareEmission(Function *func)
2303{
2304   const Target *targ = func->getProgram()->getTarget();
2305
2306   CodeEmitter::prepareEmission(func);
2307
2308   if (targ->hasSWSched) {
2309      SchedDataCalculator sched(targ);
2310      sched.run(func, true, true);
2311   }
2312}
2313
2314CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
2315   : CodeEmitter(target),
2316     writeIssueDelays(target->hasSWSched)
2317{
2318   code = NULL;
2319   codeSize = codeSizeLimit = 0;
2320   relocInfo = NULL;
2321}
2322
2323CodeEmitter *
2324TargetNVC0::getCodeEmitter(Program::Type type)
2325{
2326   CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
2327   emit->setProgramType(type);
2328   return emit;
2329}
2330
2331} // namespace nv50_ir
2332