1/*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25#include "codegen/nv50_ir_target_gm107.h"
26
27//#define GM107_DEBUG_SCHED_DATA
28
29namespace nv50_ir {
30
31class CodeEmitterGM107 : public CodeEmitter
32{
33public:
34   CodeEmitterGM107(const TargetGM107 *);
35
36   virtual bool emitInstruction(Instruction *);
37   virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39   virtual void prepareEmission(Program *);
40   virtual void prepareEmission(Function *);
41
42   inline void setProgramType(Program::Type pType) { progType = pType; }
43
44private:
45   const TargetGM107 *targGM107;
46
47   Program::Type progType;
48
49   const Instruction *insn;
50   const bool writeIssueDelays;
51   uint32_t *data;
52
53private:
54   inline void emitField(uint32_t *, int, int, uint32_t);
55   inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57   inline void emitInsn(uint32_t, bool);
58   inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59   inline void emitPred();
60   inline void emitGPR(int, const Value *);
61   inline void emitGPR(int pos) {
62      emitGPR(pos, (const Value *)NULL);
63   }
64   inline void emitGPR(int pos, const ValueRef &ref) {
65      emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66   }
67   inline void emitGPR(int pos, const ValueRef *ref) {
68      emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69   }
70   inline void emitGPR(int pos, const ValueDef &def) {
71      emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72   }
73   inline void emitSYS(int, const Value *);
74   inline void emitSYS(int pos, const ValueRef &ref) {
75      emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76   }
77   inline void emitPRED(int, const Value *);
78   inline void emitPRED(int pos) {
79      emitPRED(pos, (const Value *)NULL);
80   }
81   inline void emitPRED(int pos, const ValueRef &ref) {
82      emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83   }
84   inline void emitPRED(int pos, const ValueDef &def) {
85      emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86   }
87   inline void emitADDR(int, int, int, int, const ValueRef &);
88   inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89   inline bool longIMMD(const ValueRef &);
90   inline void emitIMMD(int, int, const ValueRef &);
91
92   void emitCond3(int, CondCode);
93   void emitCond4(int, CondCode);
94   void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95   inline void emitO(int);
96   inline void emitP(int);
97   inline void emitSAT(int);
98   inline void emitCC(int);
99   inline void emitX(int);
100   inline void emitABS(int, const ValueRef &);
101   inline void emitNEG(int, const ValueRef &);
102   inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103   inline void emitFMZ(int, int);
104   inline void emitRND(int, RoundMode, int);
105   inline void emitRND(int pos) {
106      emitRND(pos, insn->rnd, -1);
107   }
108   inline void emitPDIV(int);
109   inline void emitINV(int, const ValueRef &);
110
111   void emitEXIT();
112   void emitBRA();
113   void emitCAL();
114   void emitPCNT();
115   void emitCONT();
116   void emitPBK();
117   void emitBRK();
118   void emitPRET();
119   void emitRET();
120   void emitSSY();
121   void emitSYNC();
122   void emitSAM();
123   void emitRAM();
124
125   void emitMOV();
126   void emitS2R();
127   void emitF2F();
128   void emitF2I();
129   void emitI2F();
130   void emitI2I();
131   void emitSEL();
132   void emitSHFL();
133
134   void emitDADD();
135   void emitDMUL();
136   void emitDFMA();
137   void emitDMNMX();
138   void emitDSET();
139   void emitDSETP();
140
141   void emitFADD();
142   void emitFMUL();
143   void emitFFMA();
144   void emitMUFU();
145   void emitFMNMX();
146   void emitRRO();
147   void emitFCMP();
148   void emitFSET();
149   void emitFSETP();
150   void emitFSWZADD();
151
152   void emitLOP();
153   void emitNOT();
154   void emitIADD();
155   void emitIMUL();
156   void emitIMAD();
157   void emitISCADD();
158   void emitIMNMX();
159   void emitICMP();
160   void emitISET();
161   void emitISETP();
162   void emitSHL();
163   void emitSHR();
164   void emitPOPC();
165   void emitBFI();
166   void emitBFE();
167   void emitFLO();
168
169   void emitLDSTs(int, DataType);
170   void emitLDSTc(int);
171   void emitLDC();
172   void emitLDL();
173   void emitLDS();
174   void emitLD();
175   void emitSTL();
176   void emitSTS();
177   void emitST();
178   void emitALD();
179   void emitAST();
180   void emitISBERD();
181   void emitAL2P();
182   void emitIPA();
183   void emitATOM();
184   void emitATOMS();
185   void emitRED();
186   void emitCCTL();
187
188   void emitPIXLD();
189
190   void emitTEXs(int);
191   void emitTEX();
192   void emitTLD();
193   void emitTLD4();
194   void emitTXD();
195   void emitTXQ();
196   void emitTMML();
197   void emitDEPBAR();
198
199   void emitNOP();
200   void emitKIL();
201   void emitOUT();
202
203   void emitBAR();
204   void emitMEMBAR();
205
206   void emitVOTE();
207
208   void emitSUTarget();
209   void emitSUHandle(const int s);
210   void emitSUSTx();
211   void emitSULDx();
212   void emitSUREDx();
213};
214
215/*******************************************************************************
216 * general instruction layout/fields
217 ******************************************************************************/
218
219void
220CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
221{
222   if (b >= 0) {
223      uint32_t m = ((1ULL << s) - 1);
224      uint64_t d = (uint64_t)(v & m) << b;
225      assert(!(v & ~m) || (v & ~m) == ~m);
226      data[1] |= d >> 32;
227      data[0] |= d;
228   }
229}
230
231void
232CodeEmitterGM107::emitPred()
233{
234   if (insn->predSrc >= 0) {
235      emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
236      emitField(19, 1, insn->cc == CC_NOT_P);
237   } else {
238      emitField(16, 3, 7);
239   }
240}
241
242void
243CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
244{
245   code[0] = 0x00000000;
246   code[1] = hi;
247   if (pred)
248      emitPred();
249}
250
251void
252CodeEmitterGM107::emitGPR(int pos, const Value *val)
253{
254   emitField(pos, 8, val ? val->reg.data.id : 255);
255}
256
257void
258CodeEmitterGM107::emitSYS(int pos, const Value *val)
259{
260   int id = val ? val->reg.data.id : -1;
261
262   switch (id) {
263   case SV_LANEID         : id = 0x00; break;
264   case SV_VERTEX_COUNT   : id = 0x10; break;
265   case SV_INVOCATION_ID  : id = 0x11; break;
266   case SV_THREAD_KILL    : id = 0x13; break;
267   case SV_INVOCATION_INFO: id = 0x1d; break;
268   case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
269   case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
270   default:
271      assert(!"invalid system value");
272      id = 0;
273      break;
274   }
275
276   emitField(pos, 8, id);
277}
278
279void
280CodeEmitterGM107::emitPRED(int pos, const Value *val)
281{
282   emitField(pos, 3, val ? val->reg.data.id : 7);
283}
284
285void
286CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
287                           const ValueRef &ref)
288{
289   const Value *v = ref.get();
290   assert(!(v->reg.data.offset & ((1 << shr) - 1)));
291   if (gpr >= 0)
292      emitGPR(gpr, ref.getIndirect(0));
293   emitField(off, len, v->reg.data.offset >> shr);
294}
295
296void
297CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
298                           const ValueRef &ref)
299{
300   const Value *v = ref.get();
301   const Symbol *s = v->asSym();
302
303   assert(!(s->reg.data.offset & ((1 << shr) - 1)));
304
305   emitField(buf,  5, v->reg.fileIndex);
306   if (gpr >= 0)
307      emitGPR(gpr, ref.getIndirect(0));
308   emitField(off, 16, s->reg.data.offset >> shr);
309}
310
311bool
312CodeEmitterGM107::longIMMD(const ValueRef &ref)
313{
314   if (ref.getFile() == FILE_IMMEDIATE) {
315      const ImmediateValue *imm = ref.get()->asImm();
316      if (isFloatType(insn->sType)) {
317         if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
318            return true;
319      } else {
320         if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
321             (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
322            return true;
323      }
324   }
325   return false;
326}
327
328void
329CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
330{
331   const ImmediateValue *imm = ref.get()->asImm();
332   uint32_t val = imm->reg.data.u32;
333
334   if (len == 19) {
335      if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
336         assert(!(val & 0x00000fff));
337         val >>= 12;
338      } else if (insn->sType == TYPE_F64) {
339         assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
340         val = imm->reg.data.u64 >> 44;
341      }
342      assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
343      emitField( 56,   1, (val & 0x80000) >> 19);
344      emitField(pos, len, (val & 0x7ffff));
345   } else {
346      emitField(pos, len, val);
347   }
348}
349
350/*******************************************************************************
351 * modifiers
352 ******************************************************************************/
353
354void
355CodeEmitterGM107::emitCond3(int pos, CondCode code)
356{
357   int data = 0;
358
359   switch (code) {
360   case CC_FL : data = 0x00; break;
361   case CC_LTU:
362   case CC_LT : data = 0x01; break;
363   case CC_EQU:
364   case CC_EQ : data = 0x02; break;
365   case CC_LEU:
366   case CC_LE : data = 0x03; break;
367   case CC_GTU:
368   case CC_GT : data = 0x04; break;
369   case CC_NEU:
370   case CC_NE : data = 0x05; break;
371   case CC_GEU:
372   case CC_GE : data = 0x06; break;
373   case CC_TR : data = 0x07; break;
374   default:
375      assert(!"invalid cond3");
376      break;
377   }
378
379   emitField(pos, 3, data);
380}
381
382void
383CodeEmitterGM107::emitCond4(int pos, CondCode code)
384{
385   int data = 0;
386
387   switch (code) {
388   case CC_FL: data = 0x00; break;
389   case CC_LT: data = 0x01; break;
390   case CC_EQ: data = 0x02; break;
391   case CC_LE: data = 0x03; break;
392   case CC_GT: data = 0x04; break;
393   case CC_NE: data = 0x05; break;
394   case CC_GE: data = 0x06; break;
395//   case CC_NUM: data = 0x07; break;
396//   case CC_NAN: data = 0x08; break;
397   case CC_LTU: data = 0x09; break;
398   case CC_EQU: data = 0x0a; break;
399   case CC_LEU: data = 0x0b; break;
400   case CC_GTU: data = 0x0c; break;
401   case CC_NEU: data = 0x0d; break;
402   case CC_GEU: data = 0x0e; break;
403   case CC_TR:  data = 0x0f; break;
404   default:
405      assert(!"invalid cond4");
406      break;
407   }
408
409   emitField(pos, 4, data);
410}
411
412void
413CodeEmitterGM107::emitO(int pos)
414{
415   emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
416}
417
418void
419CodeEmitterGM107::emitP(int pos)
420{
421   emitField(pos, 1, insn->perPatch);
422}
423
424void
425CodeEmitterGM107::emitSAT(int pos)
426{
427   emitField(pos, 1, insn->saturate);
428}
429
430void
431CodeEmitterGM107::emitCC(int pos)
432{
433   emitField(pos, 1, insn->flagsDef >= 0);
434}
435
436void
437CodeEmitterGM107::emitX(int pos)
438{
439   emitField(pos, 1, insn->flagsSrc >= 0);
440}
441
442void
443CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
444{
445   emitField(pos, 1, ref.mod.abs());
446}
447
448void
449CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
450{
451   emitField(pos, 1, ref.mod.neg());
452}
453
454void
455CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
456{
457   emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
458}
459
460void
461CodeEmitterGM107::emitFMZ(int pos, int len)
462{
463   emitField(pos, len, insn->dnz << 1 | insn->ftz);
464}
465
466void
467CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
468{
469   int rm = 0, ri = 0;
470   switch (rnd) {
471   case ROUND_NI: ri = 1;
472   case ROUND_N : rm = 0; break;
473   case ROUND_MI: ri = 1;
474   case ROUND_M : rm = 1; break;
475   case ROUND_PI: ri = 1;
476   case ROUND_P : rm = 2; break;
477   case ROUND_ZI: ri = 1;
478   case ROUND_Z : rm = 3; break;
479   default:
480      assert(!"invalid round mode");
481      break;
482   }
483   emitField(rip, 1, ri);
484   emitField(rmp, 2, rm);
485}
486
487void
488CodeEmitterGM107::emitPDIV(int pos)
489{
490   assert(insn->postFactor >= -3 && insn->postFactor <= 3);
491   if (insn->postFactor > 0)
492      emitField(pos, 3, 7 - insn->postFactor);
493   else
494      emitField(pos, 3, 0 - insn->postFactor);
495}
496
497void
498CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
499{
500   emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
501}
502
503/*******************************************************************************
504 * control flow
505 ******************************************************************************/
506
507void
508CodeEmitterGM107::emitEXIT()
509{
510   emitInsn (0xe3000000);
511   emitCond5(0x00, CC_TR);
512}
513
514void
515CodeEmitterGM107::emitBRA()
516{
517   const FlowInstruction *insn = this->insn->asFlow();
518   int gpr = -1;
519
520   if (insn->indirect) {
521      if (insn->absolute)
522         emitInsn(0xe2000000); // JMX
523      else
524         emitInsn(0xe2500000); // BRX
525      gpr = 0x08;
526   } else {
527      if (insn->absolute)
528         emitInsn(0xe2100000); // JMP
529      else
530         emitInsn(0xe2400000); // BRA
531      emitField(0x07, 1, insn->allWarp);
532   }
533
534   emitField(0x06, 1, insn->limit);
535   emitCond5(0x00, CC_TR);
536
537   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
538      int32_t pos = insn->target.bb->binPos;
539      if (writeIssueDelays && !(pos & 0x1f))
540         pos += 8;
541      if (!insn->absolute)
542         emitField(0x14, 24, pos - (codeSize + 8));
543      else
544         emitField(0x14, 32, pos);
545   } else {
546      emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
547      emitField(0x05, 1, 1);
548   }
549}
550
551void
552CodeEmitterGM107::emitCAL()
553{
554   const FlowInstruction *insn = this->insn->asFlow();
555
556   if (insn->absolute) {
557      emitInsn(0xe2200000, 0); // JCAL
558   } else {
559      emitInsn(0xe2600000, 0); // CAL
560   }
561
562   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
563      if (!insn->absolute)
564         emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
565      else {
566         if (insn->builtin) {
567            int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
568            addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000,  20);
569            addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
570         } else {
571            emitField(0x14, 32, insn->target.bb->binPos);
572         }
573      }
574   } else {
575      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
576      emitField(0x05, 1, 1);
577   }
578}
579
580void
581CodeEmitterGM107::emitPCNT()
582{
583   const FlowInstruction *insn = this->insn->asFlow();
584
585   emitInsn(0xe2b00000, 0);
586
587   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
588      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
589   } else {
590      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
591      emitField(0x05, 1, 1);
592   }
593}
594
595void
596CodeEmitterGM107::emitCONT()
597{
598   emitInsn (0xe3500000);
599   emitCond5(0x00, CC_TR);
600}
601
602void
603CodeEmitterGM107::emitPBK()
604{
605   const FlowInstruction *insn = this->insn->asFlow();
606
607   emitInsn(0xe2a00000, 0);
608
609   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
610      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
611   } else {
612      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
613      emitField(0x05, 1, 1);
614   }
615}
616
617void
618CodeEmitterGM107::emitBRK()
619{
620   emitInsn (0xe3400000);
621   emitCond5(0x00, CC_TR);
622}
623
624void
625CodeEmitterGM107::emitPRET()
626{
627   const FlowInstruction *insn = this->insn->asFlow();
628
629   emitInsn(0xe2700000, 0);
630
631   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
632      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
633   } else {
634      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
635      emitField(0x05, 1, 1);
636   }
637}
638
639void
640CodeEmitterGM107::emitRET()
641{
642   emitInsn (0xe3200000);
643   emitCond5(0x00, CC_TR);
644}
645
646void
647CodeEmitterGM107::emitSSY()
648{
649   const FlowInstruction *insn = this->insn->asFlow();
650
651   emitInsn(0xe2900000, 0);
652
653   if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
654      emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
655   } else {
656      emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
657      emitField(0x05, 1, 1);
658   }
659}
660
661void
662CodeEmitterGM107::emitSYNC()
663{
664   emitInsn (0xf0f80000);
665   emitCond5(0x00, CC_TR);
666}
667
668void
669CodeEmitterGM107::emitSAM()
670{
671   emitInsn(0xe3700000, 0);
672}
673
674void
675CodeEmitterGM107::emitRAM()
676{
677   emitInsn(0xe3800000, 0);
678}
679
680/*******************************************************************************
681 * predicate/cc
682 ******************************************************************************/
683
684/*******************************************************************************
685 * movement / conversion
686 ******************************************************************************/
687
688void
689CodeEmitterGM107::emitMOV()
690{
691   if (insn->src(0).getFile() != FILE_IMMEDIATE) {
692      switch (insn->src(0).getFile()) {
693      case FILE_GPR:
694         if (insn->def(0).getFile() == FILE_PREDICATE) {
695            emitInsn(0x5b6a0000);
696            emitGPR (0x08);
697         } else {
698            emitInsn(0x5c980000);
699         }
700         emitGPR (0x14, insn->src(0));
701         break;
702      case FILE_MEMORY_CONST:
703         emitInsn(0x4c980000);
704         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
705         break;
706      case FILE_IMMEDIATE:
707         emitInsn(0x38980000);
708         emitIMMD(0x14, 19, insn->src(0));
709         break;
710      case FILE_PREDICATE:
711         emitInsn(0x50880000);
712         emitPRED(0x0c, insn->src(0));
713         emitPRED(0x1d);
714         emitPRED(0x27);
715         break;
716      default:
717         assert(!"bad src file");
718         break;
719      }
720      if (insn->def(0).getFile() != FILE_PREDICATE &&
721          insn->src(0).getFile() != FILE_PREDICATE)
722         emitField(0x27, 4, insn->lanes);
723   } else {
724      emitInsn (0x01000000);
725      emitIMMD (0x14, 32, insn->src(0));
726      emitField(0x0c, 4, insn->lanes);
727   }
728
729   if (insn->def(0).getFile() == FILE_PREDICATE) {
730      emitPRED(0x27);
731      emitPRED(0x03, insn->def(0));
732      emitPRED(0x00);
733   } else {
734      emitGPR(0x00, insn->def(0));
735   }
736}
737
738void
739CodeEmitterGM107::emitS2R()
740{
741   emitInsn(0xf0c80000);
742   emitSYS (0x14, insn->src(0));
743   emitGPR (0x00, insn->def(0));
744}
745
746void
747CodeEmitterGM107::emitF2F()
748{
749   RoundMode rnd = insn->rnd;
750
751   switch (insn->op) {
752   case OP_FLOOR: rnd = ROUND_MI; break;
753   case OP_CEIL : rnd = ROUND_PI; break;
754   case OP_TRUNC: rnd = ROUND_ZI; break;
755   default:
756      break;
757   }
758
759   switch (insn->src(0).getFile()) {
760   case FILE_GPR:
761      emitInsn(0x5ca80000);
762      emitGPR (0x14, insn->src(0));
763      break;
764   case FILE_MEMORY_CONST:
765      emitInsn(0x4ca80000);
766      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
767      break;
768   case FILE_IMMEDIATE:
769      emitInsn(0x38a80000);
770      emitIMMD(0x14, 19, insn->src(0));
771      break;
772   default:
773      assert(!"bad src0 file");
774      break;
775   }
776
777   emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
778   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
779   emitCC   (0x2f);
780   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
781   emitFMZ  (0x2c, 1);
782   emitField(0x29, 1, insn->subOp);
783   emitRND  (0x27, rnd, 0x2a);
784   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
785   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
786   emitGPR  (0x00, insn->def(0));
787}
788
789void
790CodeEmitterGM107::emitF2I()
791{
792   RoundMode rnd = insn->rnd;
793
794   switch (insn->op) {
795   case OP_FLOOR: rnd = ROUND_M; break;
796   case OP_CEIL : rnd = ROUND_P; break;
797   case OP_TRUNC: rnd = ROUND_Z; break;
798   default:
799      break;
800   }
801
802   switch (insn->src(0).getFile()) {
803   case FILE_GPR:
804      emitInsn(0x5cb00000);
805      emitGPR (0x14, insn->src(0));
806      break;
807   case FILE_MEMORY_CONST:
808      emitInsn(0x4cb00000);
809      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
810      break;
811   case FILE_IMMEDIATE:
812      emitInsn(0x38b00000);
813      emitIMMD(0x14, 19, insn->src(0));
814      break;
815   default:
816      assert(!"bad src0 file");
817      break;
818   }
819
820   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
821   emitCC   (0x2f);
822   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
823   emitFMZ  (0x2c, 1);
824   emitRND  (0x27, rnd, 0x2a);
825   emitField(0x0c, 1, isSignedType(insn->dType));
826   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
827   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
828   emitGPR  (0x00, insn->def(0));
829}
830
831void
832CodeEmitterGM107::emitI2F()
833{
834   RoundMode rnd = insn->rnd;
835
836   switch (insn->op) {
837   case OP_FLOOR: rnd = ROUND_M; break;
838   case OP_CEIL : rnd = ROUND_P; break;
839   case OP_TRUNC: rnd = ROUND_Z; break;
840   default:
841      break;
842   }
843
844   switch (insn->src(0).getFile()) {
845   case FILE_GPR:
846      emitInsn(0x5cb80000);
847      emitGPR (0x14, insn->src(0));
848      break;
849   case FILE_MEMORY_CONST:
850      emitInsn(0x4cb80000);
851      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
852      break;
853   case FILE_IMMEDIATE:
854      emitInsn(0x38b80000);
855      emitIMMD(0x14, 19, insn->src(0));
856      break;
857   default:
858      assert(!"bad src0 file");
859      break;
860   }
861
862   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
863   emitCC   (0x2f);
864   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
865   emitField(0x29, 2, insn->subOp);
866   emitRND  (0x27, rnd, -1);
867   emitField(0x0d, 1, isSignedType(insn->sType));
868   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
869   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
870   emitGPR  (0x00, insn->def(0));
871}
872
873void
874CodeEmitterGM107::emitI2I()
875{
876   switch (insn->src(0).getFile()) {
877   case FILE_GPR:
878      emitInsn(0x5ce00000);
879      emitGPR (0x14, insn->src(0));
880      break;
881   case FILE_MEMORY_CONST:
882      emitInsn(0x4ce00000);
883      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
884      break;
885   case FILE_IMMEDIATE:
886      emitInsn(0x38e00000);
887      emitIMMD(0x14, 19, insn->src(0));
888      break;
889   default:
890      assert(!"bad src0 file");
891      break;
892   }
893
894   emitSAT  (0x32);
895   emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
896   emitCC   (0x2f);
897   emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
898   emitField(0x29, 2, insn->subOp);
899   emitField(0x0d, 1, isSignedType(insn->sType));
900   emitField(0x0c, 1, isSignedType(insn->dType));
901   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
902   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
903   emitGPR  (0x00, insn->def(0));
904}
905
906static void
907selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
908{
909   int loc = entry->loc;
910   if (data.force_persample_interp)
911      code[loc + 1] |= 1 << 10;
912   else
913      code[loc + 1] &= ~(1 << 10);
914}
915
916void
917CodeEmitterGM107::emitSEL()
918{
919   switch (insn->src(1).getFile()) {
920   case FILE_GPR:
921      emitInsn(0x5ca00000);
922      emitGPR (0x14, insn->src(1));
923      break;
924   case FILE_MEMORY_CONST:
925      emitInsn(0x4ca00000);
926      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
927      break;
928   case FILE_IMMEDIATE:
929      emitInsn(0x38a00000);
930      emitIMMD(0x14, 19, insn->src(1));
931      break;
932   default:
933      assert(!"bad src1 file");
934      break;
935   }
936
937   emitINV (0x2a, insn->src(2));
938   emitPRED(0x27, insn->src(2));
939   emitGPR (0x08, insn->src(0));
940   emitGPR (0x00, insn->def(0));
941
942   if (insn->subOp == 1) {
943      addInterp(0, 0, selpFlip);
944   }
945}
946
947void
948CodeEmitterGM107::emitSHFL()
949{
950   int type = 0;
951
952   emitInsn (0xef100000);
953
954   switch (insn->src(1).getFile()) {
955   case FILE_GPR:
956      emitGPR(0x14, insn->src(1));
957      break;
958   case FILE_IMMEDIATE:
959      emitIMMD(0x14, 5, insn->src(1));
960      type |= 1;
961      break;
962   default:
963      assert(!"invalid src1 file");
964      break;
965   }
966
967   /*XXX: what is this arg? hardcode immediate for now */
968   emitField(0x22, 13, 0x1c03);
969   type |= 2;
970
971   emitPRED (0x30);
972   emitField(0x1e, 2, insn->subOp);
973   emitField(0x1c, 2, type);
974   emitGPR  (0x08, insn->src(0));
975   emitGPR  (0x00, insn->def(0));
976}
977
978/*******************************************************************************
979 * double
980 ******************************************************************************/
981
982void
983CodeEmitterGM107::emitDADD()
984{
985   switch (insn->src(1).getFile()) {
986   case FILE_GPR:
987      emitInsn(0x5c700000);
988      emitGPR (0x14, insn->src(1));
989      break;
990   case FILE_MEMORY_CONST:
991      emitInsn(0x4c700000);
992      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
993      break;
994   case FILE_IMMEDIATE:
995      emitInsn(0x38700000);
996      emitIMMD(0x14, 19, insn->src(1));
997      break;
998   default:
999      assert(!"bad src1 file");
1000      break;
1001   }
1002   emitABS(0x31, insn->src(1));
1003   emitNEG(0x30, insn->src(0));
1004   emitCC (0x2f);
1005   emitABS(0x2e, insn->src(0));
1006   emitNEG(0x2d, insn->src(1));
1007
1008   if (insn->op == OP_SUB)
1009      code[1] ^= 0x00002000;
1010
1011   emitGPR(0x08, insn->src(0));
1012   emitGPR(0x00, insn->def(0));
1013}
1014
1015void
1016CodeEmitterGM107::emitDMUL()
1017{
1018   switch (insn->src(1).getFile()) {
1019   case FILE_GPR:
1020      emitInsn(0x5c800000);
1021      emitGPR (0x14, insn->src(1));
1022      break;
1023   case FILE_MEMORY_CONST:
1024      emitInsn(0x4c800000);
1025      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1026      break;
1027   case FILE_IMMEDIATE:
1028      emitInsn(0x38800000);
1029      emitIMMD(0x14, 19, insn->src(1));
1030      break;
1031   default:
1032      assert(!"bad src1 file");
1033      break;
1034   }
1035
1036   emitNEG2(0x30, insn->src(0), insn->src(1));
1037   emitCC  (0x2f);
1038   emitRND (0x27);
1039   emitGPR (0x08, insn->src(0));
1040   emitGPR (0x00, insn->def(0));
1041}
1042
1043void
1044CodeEmitterGM107::emitDFMA()
1045{
1046   switch(insn->src(2).getFile()) {
1047   case FILE_GPR:
1048      switch (insn->src(1).getFile()) {
1049      case FILE_GPR:
1050         emitInsn(0x5b700000);
1051         emitGPR (0x14, insn->src(1));
1052         break;
1053      case FILE_MEMORY_CONST:
1054         emitInsn(0x4b700000);
1055         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1056         break;
1057      case FILE_IMMEDIATE:
1058         emitInsn(0x36700000);
1059         emitIMMD(0x14, 19, insn->src(1));
1060         break;
1061      default:
1062         assert(!"bad src1 file");
1063         break;
1064      }
1065      emitGPR (0x27, insn->src(2));
1066      break;
1067   case FILE_MEMORY_CONST:
1068      emitInsn(0x53700000);
1069      emitGPR (0x27, insn->src(1));
1070      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1071      break;
1072   default:
1073      assert(!"bad src2 file");
1074      break;
1075   }
1076
1077   emitRND (0x32);
1078   emitNEG (0x31, insn->src(2));
1079   emitNEG2(0x30, insn->src(0), insn->src(1));
1080   emitCC  (0x2f);
1081   emitGPR (0x08, insn->src(0));
1082   emitGPR (0x00, insn->def(0));
1083}
1084
1085void
1086CodeEmitterGM107::emitDMNMX()
1087{
1088   switch (insn->src(1).getFile()) {
1089   case FILE_GPR:
1090      emitInsn(0x5c500000);
1091      emitGPR (0x14, insn->src(1));
1092      break;
1093   case FILE_MEMORY_CONST:
1094      emitInsn(0x4c500000);
1095      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096      break;
1097   case FILE_IMMEDIATE:
1098      emitInsn(0x38500000);
1099      emitIMMD(0x14, 19, insn->src(1));
1100      break;
1101   default:
1102      assert(!"bad src1 file");
1103      break;
1104   }
1105
1106   emitABS  (0x31, insn->src(1));
1107   emitNEG  (0x30, insn->src(0));
1108   emitCC   (0x2f);
1109   emitABS  (0x2e, insn->src(0));
1110   emitNEG  (0x2d, insn->src(1));
1111   emitField(0x2a, 1, insn->op == OP_MAX);
1112   emitPRED (0x27);
1113   emitGPR  (0x08, insn->src(0));
1114   emitGPR  (0x00, insn->def(0));
1115}
1116
1117void
1118CodeEmitterGM107::emitDSET()
1119{
1120   const CmpInstruction *insn = this->insn->asCmp();
1121
1122   switch (insn->src(1).getFile()) {
1123   case FILE_GPR:
1124      emitInsn(0x59000000);
1125      emitGPR (0x14, insn->src(1));
1126      break;
1127   case FILE_MEMORY_CONST:
1128      emitInsn(0x49000000);
1129      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1130      break;
1131   case FILE_IMMEDIATE:
1132      emitInsn(0x32000000);
1133      emitIMMD(0x14, 19, insn->src(1));
1134      break;
1135   default:
1136      assert(!"bad src1 file");
1137      break;
1138   }
1139
1140   if (insn->op != OP_SET) {
1141      switch (insn->op) {
1142      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1143      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1144      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1145      default:
1146         assert(!"invalid set op");
1147         break;
1148      }
1149      emitPRED(0x27, insn->src(2));
1150   } else {
1151      emitPRED(0x27);
1152   }
1153
1154   emitABS  (0x36, insn->src(0));
1155   emitNEG  (0x35, insn->src(1));
1156   emitField(0x34, 1, insn->dType == TYPE_F32);
1157   emitCond4(0x30, insn->setCond);
1158   emitCC   (0x2f);
1159   emitABS  (0x2c, insn->src(1));
1160   emitNEG  (0x2b, insn->src(0));
1161   emitGPR  (0x08, insn->src(0));
1162   emitGPR  (0x00, insn->def(0));
1163}
1164
1165void
1166CodeEmitterGM107::emitDSETP()
1167{
1168   const CmpInstruction *insn = this->insn->asCmp();
1169
1170   switch (insn->src(1).getFile()) {
1171   case FILE_GPR:
1172      emitInsn(0x5b800000);
1173      emitGPR (0x14, insn->src(1));
1174      break;
1175   case FILE_MEMORY_CONST:
1176      emitInsn(0x4b800000);
1177      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1178      break;
1179   case FILE_IMMEDIATE:
1180      emitInsn(0x36800000);
1181      emitIMMD(0x14, 19, insn->src(1));
1182      break;
1183   default:
1184      assert(!"bad src1 file");
1185      break;
1186   }
1187
1188   if (insn->op != OP_SET) {
1189      switch (insn->op) {
1190      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1191      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1192      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1193      default:
1194         assert(!"invalid set op");
1195         break;
1196      }
1197      emitPRED(0x27, insn->src(2));
1198   } else {
1199      emitPRED(0x27);
1200   }
1201
1202   emitCond4(0x30, insn->setCond);
1203   emitABS  (0x2c, insn->src(1));
1204   emitNEG  (0x2b, insn->src(0));
1205   emitGPR  (0x08, insn->src(0));
1206   emitABS  (0x07, insn->src(0));
1207   emitNEG  (0x06, insn->src(1));
1208   emitPRED (0x03, insn->def(0));
1209   if (insn->defExists(1))
1210      emitPRED(0x00, insn->def(1));
1211   else
1212      emitPRED(0x00);
1213}
1214
1215/*******************************************************************************
1216 * float
1217 ******************************************************************************/
1218
1219void
1220CodeEmitterGM107::emitFADD()
1221{
1222   if (!longIMMD(insn->src(1))) {
1223      switch (insn->src(1).getFile()) {
1224      case FILE_GPR:
1225         emitInsn(0x5c580000);
1226         emitGPR (0x14, insn->src(1));
1227         break;
1228      case FILE_MEMORY_CONST:
1229         emitInsn(0x4c580000);
1230         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1231         break;
1232      case FILE_IMMEDIATE:
1233         emitInsn(0x38580000);
1234         emitIMMD(0x14, 19, insn->src(1));
1235         break;
1236      default:
1237         assert(!"bad src1 file");
1238         break;
1239      }
1240      emitSAT(0x32);
1241      emitABS(0x31, insn->src(1));
1242      emitNEG(0x30, insn->src(0));
1243      emitCC (0x2f);
1244      emitABS(0x2e, insn->src(0));
1245      emitNEG(0x2d, insn->src(1));
1246      emitFMZ(0x2c, 1);
1247
1248      if (insn->op == OP_SUB)
1249         code[1] ^= 0x00002000;
1250   } else {
1251      emitInsn(0x08000000);
1252      emitABS(0x39, insn->src(1));
1253      emitNEG(0x38, insn->src(0));
1254      emitFMZ(0x37, 1);
1255      emitABS(0x36, insn->src(0));
1256      emitNEG(0x35, insn->src(1));
1257      emitCC  (0x34);
1258      emitIMMD(0x14, 32, insn->src(1));
1259
1260      if (insn->op == OP_SUB)
1261         code[1] ^= 0x00080000;
1262   }
1263
1264   emitGPR(0x08, insn->src(0));
1265   emitGPR(0x00, insn->def(0));
1266}
1267
1268void
1269CodeEmitterGM107::emitFMUL()
1270{
1271   if (!longIMMD(insn->src(1))) {
1272      switch (insn->src(1).getFile()) {
1273      case FILE_GPR:
1274         emitInsn(0x5c680000);
1275         emitGPR (0x14, insn->src(1));
1276         break;
1277      case FILE_MEMORY_CONST:
1278         emitInsn(0x4c680000);
1279         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1280         break;
1281      case FILE_IMMEDIATE:
1282         emitInsn(0x38680000);
1283         emitIMMD(0x14, 19, insn->src(1));
1284         break;
1285      default:
1286         assert(!"bad src1 file");
1287         break;
1288      }
1289      emitSAT (0x32);
1290      emitNEG2(0x30, insn->src(0), insn->src(1));
1291      emitCC  (0x2f);
1292      emitFMZ (0x2c, 2);
1293      emitPDIV(0x29);
1294      emitRND (0x27);
1295   } else {
1296      emitInsn(0x1e000000);
1297      emitSAT (0x37);
1298      emitFMZ (0x35, 2);
1299      emitCC  (0x34);
1300      emitIMMD(0x14, 32, insn->src(1));
1301      if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1302         code[1] ^= 0x00080000; /* flip immd sign bit */
1303   }
1304
1305   emitGPR(0x08, insn->src(0));
1306   emitGPR(0x00, insn->def(0));
1307}
1308
1309void
1310CodeEmitterGM107::emitFFMA()
1311{
1312   /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1313   switch(insn->src(2).getFile()) {
1314   case FILE_GPR:
1315      switch (insn->src(1).getFile()) {
1316      case FILE_GPR:
1317         emitInsn(0x59800000);
1318         emitGPR (0x14, insn->src(1));
1319         break;
1320      case FILE_MEMORY_CONST:
1321         emitInsn(0x49800000);
1322         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1323         break;
1324      case FILE_IMMEDIATE:
1325         emitInsn(0x32800000);
1326         emitIMMD(0x14, 19, insn->src(1));
1327         break;
1328      default:
1329         assert(!"bad src1 file");
1330         break;
1331      }
1332      emitGPR (0x27, insn->src(2));
1333      break;
1334   case FILE_MEMORY_CONST:
1335      emitInsn(0x51800000);
1336      emitGPR (0x27, insn->src(1));
1337      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1338      break;
1339   default:
1340      assert(!"bad src2 file");
1341      break;
1342   }
1343   emitRND (0x33);
1344   emitSAT (0x32);
1345   emitNEG (0x31, insn->src(2));
1346   emitNEG2(0x30, insn->src(0), insn->src(1));
1347   emitCC  (0x2f);
1348
1349   emitFMZ(0x35, 2);
1350   emitGPR(0x08, insn->src(0));
1351   emitGPR(0x00, insn->def(0));
1352}
1353
1354void
1355CodeEmitterGM107::emitMUFU()
1356{
1357   int mufu = 0;
1358
1359   switch (insn->op) {
1360   case OP_COS: mufu = 0; break;
1361   case OP_SIN: mufu = 1; break;
1362   case OP_EX2: mufu = 2; break;
1363   case OP_LG2: mufu = 3; break;
1364   case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1365   case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1366   default:
1367      assert(!"invalid mufu");
1368      break;
1369   }
1370
1371   emitInsn (0x50800000);
1372   emitSAT  (0x32);
1373   emitNEG  (0x30, insn->src(0));
1374   emitABS  (0x2e, insn->src(0));
1375   emitField(0x14, 3, mufu);
1376   emitGPR  (0x08, insn->src(0));
1377   emitGPR  (0x00, insn->def(0));
1378}
1379
1380void
1381CodeEmitterGM107::emitFMNMX()
1382{
1383   switch (insn->src(1).getFile()) {
1384   case FILE_GPR:
1385      emitInsn(0x5c600000);
1386      emitGPR (0x14, insn->src(1));
1387      break;
1388   case FILE_MEMORY_CONST:
1389      emitInsn(0x4c600000);
1390      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1391      break;
1392   case FILE_IMMEDIATE:
1393      emitInsn(0x38600000);
1394      emitIMMD(0x14, 19, insn->src(1));
1395      break;
1396   default:
1397      assert(!"bad src1 file");
1398      break;
1399   }
1400
1401   emitField(0x2a, 1, insn->op == OP_MAX);
1402   emitPRED (0x27);
1403
1404   emitABS(0x31, insn->src(1));
1405   emitNEG(0x30, insn->src(0));
1406   emitCC (0x2f);
1407   emitABS(0x2e, insn->src(0));
1408   emitNEG(0x2d, insn->src(1));
1409   emitFMZ(0x2c, 1);
1410   emitGPR(0x08, insn->src(0));
1411   emitGPR(0x00, insn->def(0));
1412}
1413
1414void
1415CodeEmitterGM107::emitRRO()
1416{
1417   switch (insn->src(0).getFile()) {
1418   case FILE_GPR:
1419      emitInsn(0x5c900000);
1420      emitGPR (0x14, insn->src(0));
1421      break;
1422   case FILE_MEMORY_CONST:
1423      emitInsn(0x4c900000);
1424      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1425      break;
1426   case FILE_IMMEDIATE:
1427      emitInsn(0x38900000);
1428      emitIMMD(0x14, 19, insn->src(0));
1429      break;
1430   default:
1431      assert(!"bad src file");
1432      break;
1433   }
1434
1435   emitABS  (0x31, insn->src(0));
1436   emitNEG  (0x2d, insn->src(0));
1437   emitField(0x27, 1, insn->op == OP_PREEX2);
1438   emitGPR  (0x00, insn->def(0));
1439}
1440
1441void
1442CodeEmitterGM107::emitFCMP()
1443{
1444   const CmpInstruction *insn = this->insn->asCmp();
1445   CondCode cc = insn->setCond;
1446
1447   if (insn->src(2).mod.neg())
1448      cc = reverseCondCode(cc);
1449
1450   switch(insn->src(2).getFile()) {
1451   case FILE_GPR:
1452      switch (insn->src(1).getFile()) {
1453      case FILE_GPR:
1454         emitInsn(0x5ba00000);
1455         emitGPR (0x14, insn->src(1));
1456         break;
1457      case FILE_MEMORY_CONST:
1458         emitInsn(0x4ba00000);
1459         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1460         break;
1461      case FILE_IMMEDIATE:
1462         emitInsn(0x36a00000);
1463         emitIMMD(0x14, 19, insn->src(1));
1464         break;
1465      default:
1466         assert(!"bad src1 file");
1467         break;
1468      }
1469      emitGPR (0x27, insn->src(2));
1470      break;
1471   case FILE_MEMORY_CONST:
1472      emitInsn(0x53a00000);
1473      emitGPR (0x27, insn->src(1));
1474      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1475      break;
1476   default:
1477      assert(!"bad src2 file");
1478      break;
1479   }
1480
1481   emitCond4(0x30, cc);
1482   emitFMZ  (0x2f, 1);
1483   emitGPR  (0x08, insn->src(0));
1484   emitGPR  (0x00, insn->def(0));
1485}
1486
1487void
1488CodeEmitterGM107::emitFSET()
1489{
1490   const CmpInstruction *insn = this->insn->asCmp();
1491
1492   switch (insn->src(1).getFile()) {
1493   case FILE_GPR:
1494      emitInsn(0x58000000);
1495      emitGPR (0x14, insn->src(1));
1496      break;
1497   case FILE_MEMORY_CONST:
1498      emitInsn(0x48000000);
1499      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1500      break;
1501   case FILE_IMMEDIATE:
1502      emitInsn(0x30000000);
1503      emitIMMD(0x14, 19, insn->src(1));
1504      break;
1505   default:
1506      assert(!"bad src1 file");
1507      break;
1508   }
1509
1510   if (insn->op != OP_SET) {
1511      switch (insn->op) {
1512      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1513      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1514      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1515      default:
1516         assert(!"invalid set op");
1517         break;
1518      }
1519      emitPRED(0x27, insn->src(2));
1520   } else {
1521      emitPRED(0x27);
1522   }
1523
1524   emitFMZ  (0x37, 1);
1525   emitABS  (0x36, insn->src(0));
1526   emitNEG  (0x35, insn->src(1));
1527   emitField(0x34, 1, insn->dType == TYPE_F32);
1528   emitCond4(0x30, insn->setCond);
1529   emitCC   (0x2f);
1530   emitABS  (0x2c, insn->src(1));
1531   emitNEG  (0x2b, insn->src(0));
1532   emitGPR  (0x08, insn->src(0));
1533   emitGPR  (0x00, insn->def(0));
1534}
1535
1536void
1537CodeEmitterGM107::emitFSETP()
1538{
1539   const CmpInstruction *insn = this->insn->asCmp();
1540
1541   switch (insn->src(1).getFile()) {
1542   case FILE_GPR:
1543      emitInsn(0x5bb00000);
1544      emitGPR (0x14, insn->src(1));
1545      break;
1546   case FILE_MEMORY_CONST:
1547      emitInsn(0x4bb00000);
1548      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1549      break;
1550   case FILE_IMMEDIATE:
1551      emitInsn(0x36b00000);
1552      emitIMMD(0x14, 19, insn->src(1));
1553      break;
1554   default:
1555      assert(!"bad src1 file");
1556      break;
1557   }
1558
1559   if (insn->op != OP_SET) {
1560      switch (insn->op) {
1561      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1562      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1563      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1564      default:
1565         assert(!"invalid set op");
1566         break;
1567      }
1568      emitPRED(0x27, insn->src(2));
1569   } else {
1570      emitPRED(0x27);
1571   }
1572
1573   emitCond4(0x30, insn->setCond);
1574   emitFMZ  (0x2f, 1);
1575   emitABS  (0x2c, insn->src(1));
1576   emitNEG  (0x2b, insn->src(0));
1577   emitGPR  (0x08, insn->src(0));
1578   emitABS  (0x07, insn->src(0));
1579   emitNEG  (0x06, insn->src(1));
1580   emitPRED (0x03, insn->def(0));
1581   if (insn->defExists(1))
1582      emitPRED(0x00, insn->def(1));
1583   else
1584      emitPRED(0x00);
1585}
1586
1587void
1588CodeEmitterGM107::emitFSWZADD()
1589{
1590   emitInsn (0x50f80000);
1591   emitCC   (0x2f);
1592   emitFMZ  (0x2c, 1);
1593   emitRND  (0x27);
1594   emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1595   emitField(0x1c, 8, insn->subOp);
1596   if (insn->predSrc != 1)
1597      emitGPR  (0x14, insn->src(1));
1598   else
1599      emitGPR  (0x14);
1600   emitGPR  (0x08, insn->src(0));
1601   emitGPR  (0x00, insn->def(0));
1602}
1603
1604/*******************************************************************************
1605 * integer
1606 ******************************************************************************/
1607
1608void
1609CodeEmitterGM107::emitLOP()
1610{
1611   int lop = 0;
1612
1613   switch (insn->op) {
1614   case OP_AND: lop = 0; break;
1615   case OP_OR : lop = 1; break;
1616   case OP_XOR: lop = 2; break;
1617   default:
1618      assert(!"invalid lop");
1619      break;
1620   }
1621
1622   if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1623      switch (insn->src(1).getFile()) {
1624      case FILE_GPR:
1625         emitInsn(0x5c400000);
1626         emitGPR (0x14, insn->src(1));
1627         break;
1628      case FILE_MEMORY_CONST:
1629         emitInsn(0x4c400000);
1630         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1631         break;
1632      case FILE_IMMEDIATE:
1633         emitInsn(0x38400000);
1634         emitIMMD(0x14, 19, insn->src(1));
1635         break;
1636      default:
1637         assert(!"bad src1 file");
1638         break;
1639      }
1640      emitPRED (0x30);
1641      emitCC   (0x2f);
1642      emitX    (0x2b);
1643      emitField(0x29, 2, lop);
1644      emitINV  (0x28, insn->src(1));
1645      emitINV  (0x27, insn->src(0));
1646   } else {
1647      emitInsn (0x04000000);
1648      emitX    (0x39);
1649      emitINV  (0x38, insn->src(1));
1650      emitINV  (0x37, insn->src(0));
1651      emitField(0x35, 2, lop);
1652      emitCC   (0x34);
1653      emitIMMD (0x14, 32, insn->src(1));
1654   }
1655
1656   emitGPR  (0x08, insn->src(0));
1657   emitGPR  (0x00, insn->def(0));
1658}
1659
1660/* special-case of emitLOP(): lop pass_b dst 0 ~src */
1661void
1662CodeEmitterGM107::emitNOT()
1663{
1664   if (!longIMMD(insn->src(0))) {
1665      switch (insn->src(0).getFile()) {
1666      case FILE_GPR:
1667         emitInsn(0x5c400700);
1668         emitGPR (0x14, insn->src(0));
1669         break;
1670      case FILE_MEMORY_CONST:
1671         emitInsn(0x4c400700);
1672         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1673         break;
1674      case FILE_IMMEDIATE:
1675         emitInsn(0x38400700);
1676         emitIMMD(0x14, 19, insn->src(0));
1677         break;
1678      default:
1679         assert(!"bad src1 file");
1680         break;
1681      }
1682      emitPRED (0x30);
1683   } else {
1684      emitInsn (0x05600000);
1685      emitIMMD (0x14, 32, insn->src(1));
1686   }
1687
1688   emitGPR(0x08);
1689   emitGPR(0x00, insn->def(0));
1690}
1691
1692void
1693CodeEmitterGM107::emitIADD()
1694{
1695   if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1696      switch (insn->src(1).getFile()) {
1697      case FILE_GPR:
1698         emitInsn(0x5c100000);
1699         emitGPR (0x14, insn->src(1));
1700         break;
1701      case FILE_MEMORY_CONST:
1702         emitInsn(0x4c100000);
1703         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1704         break;
1705      case FILE_IMMEDIATE:
1706         emitInsn(0x38100000);
1707         emitIMMD(0x14, 19, insn->src(1));
1708         break;
1709      default:
1710         assert(!"bad src1 file");
1711         break;
1712      }
1713      emitSAT(0x32);
1714      emitNEG(0x31, insn->src(0));
1715      emitNEG(0x30, insn->src(1));
1716      emitCC (0x2f);
1717      emitX  (0x2b);
1718   } else {
1719      emitInsn(0x1c000000);
1720      emitNEG (0x38, insn->src(0));
1721      emitSAT (0x36);
1722      emitX   (0x35);
1723      emitCC  (0x34);
1724      emitIMMD(0x14, 32, insn->src(1));
1725   }
1726
1727   if (insn->op == OP_SUB)
1728      code[1] ^= 0x00010000;
1729
1730   emitGPR(0x08, insn->src(0));
1731   emitGPR(0x00, insn->def(0));
1732}
1733
1734void
1735CodeEmitterGM107::emitIMUL()
1736{
1737   if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1738      switch (insn->src(1).getFile()) {
1739      case FILE_GPR:
1740         emitInsn(0x5c380000);
1741         emitGPR (0x14, insn->src(1));
1742         break;
1743      case FILE_MEMORY_CONST:
1744         emitInsn(0x4c380000);
1745         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1746         break;
1747      case FILE_IMMEDIATE:
1748         emitInsn(0x38380000);
1749         emitIMMD(0x14, 19, insn->src(1));
1750         break;
1751      default:
1752         assert(!"bad src1 file");
1753         break;
1754      }
1755      emitCC   (0x2f);
1756      emitField(0x29, 1, isSignedType(insn->sType));
1757      emitField(0x28, 1, isSignedType(insn->dType));
1758      emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1759   } else {
1760      emitInsn (0x1f000000);
1761      emitField(0x37, 1, isSignedType(insn->sType));
1762      emitField(0x36, 1, isSignedType(insn->dType));
1763      emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1764      emitCC   (0x34);
1765      emitIMMD (0x14, 32, insn->src(1));
1766   }
1767
1768   emitGPR(0x08, insn->src(0));
1769   emitGPR(0x00, insn->def(0));
1770}
1771
1772void
1773CodeEmitterGM107::emitIMAD()
1774{
1775   /*XXX: imad32i exists, but not using it as third src overlaps dst */
1776   switch(insn->src(2).getFile()) {
1777   case FILE_GPR:
1778      switch (insn->src(1).getFile()) {
1779      case FILE_GPR:
1780         emitInsn(0x5a000000);
1781         emitGPR (0x14, insn->src(1));
1782         break;
1783      case FILE_MEMORY_CONST:
1784         emitInsn(0x4a000000);
1785         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1786         break;
1787      case FILE_IMMEDIATE:
1788         emitInsn(0x34000000);
1789         emitIMMD(0x14, 19, insn->src(1));
1790         break;
1791      default:
1792         assert(!"bad src1 file");
1793         break;
1794      }
1795      emitGPR (0x27, insn->src(2));
1796      break;
1797   case FILE_MEMORY_CONST:
1798      emitInsn(0x52000000);
1799      emitGPR (0x27, insn->src(1));
1800      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1801      break;
1802   default:
1803      assert(!"bad src2 file");
1804      break;
1805   }
1806
1807   emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1808   emitField(0x35, 1, isSignedType(insn->sType));
1809   emitNEG  (0x34, insn->src(2));
1810   emitNEG2 (0x33, insn->src(0), insn->src(1));
1811   emitSAT  (0x32);
1812   emitX    (0x31);
1813   emitField(0x30, 1, isSignedType(insn->dType));
1814   emitCC   (0x2f);
1815   emitGPR  (0x08, insn->src(0));
1816   emitGPR  (0x00, insn->def(0));
1817}
1818
1819void
1820CodeEmitterGM107::emitISCADD()
1821{
1822   switch (insn->src(2).getFile()) {
1823   case FILE_GPR:
1824      emitInsn(0x5c180000);
1825      emitGPR (0x14, insn->src(2));
1826      break;
1827   case FILE_MEMORY_CONST:
1828      emitInsn(0x4c180000);
1829      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1830      break;
1831   case FILE_IMMEDIATE:
1832      emitInsn(0x38180000);
1833      emitIMMD(0x14, 19, insn->src(2));
1834      break;
1835   default:
1836      assert(!"bad src1 file");
1837      break;
1838   }
1839   emitNEG (0x31, insn->src(0));
1840   emitNEG (0x30, insn->src(2));
1841   emitCC  (0x2f);
1842   emitIMMD(0x27, 5, insn->src(1));
1843   emitGPR (0x08, insn->src(0));
1844   emitGPR (0x00, insn->def(0));
1845}
1846
1847void
1848CodeEmitterGM107::emitIMNMX()
1849{
1850   switch (insn->src(1).getFile()) {
1851   case FILE_GPR:
1852      emitInsn(0x5c200000);
1853      emitGPR (0x14, insn->src(1));
1854      break;
1855   case FILE_MEMORY_CONST:
1856      emitInsn(0x4c200000);
1857      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1858      break;
1859   case FILE_IMMEDIATE:
1860      emitInsn(0x38200000);
1861      emitIMMD(0x14, 19, insn->src(1));
1862      break;
1863   default:
1864      assert(!"bad src1 file");
1865      break;
1866   }
1867
1868   emitField(0x30, 1, isSignedType(insn->dType));
1869   emitCC   (0x2f);
1870   emitField(0x2a, 1, insn->op == OP_MAX);
1871   emitPRED (0x27);
1872   emitGPR  (0x08, insn->src(0));
1873   emitGPR  (0x00, insn->def(0));
1874}
1875
1876void
1877CodeEmitterGM107::emitICMP()
1878{
1879   const CmpInstruction *insn = this->insn->asCmp();
1880   CondCode cc = insn->setCond;
1881
1882   if (insn->src(2).mod.neg())
1883      cc = reverseCondCode(cc);
1884
1885   switch(insn->src(2).getFile()) {
1886   case FILE_GPR:
1887      switch (insn->src(1).getFile()) {
1888      case FILE_GPR:
1889         emitInsn(0x5b400000);
1890         emitGPR (0x14, insn->src(1));
1891         break;
1892      case FILE_MEMORY_CONST:
1893         emitInsn(0x4b400000);
1894         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1895         break;
1896      case FILE_IMMEDIATE:
1897         emitInsn(0x36400000);
1898         emitIMMD(0x14, 19, insn->src(1));
1899         break;
1900      default:
1901         assert(!"bad src1 file");
1902         break;
1903      }
1904      emitGPR (0x27, insn->src(2));
1905      break;
1906   case FILE_MEMORY_CONST:
1907      emitInsn(0x53400000);
1908      emitGPR (0x27, insn->src(1));
1909      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1910      break;
1911   default:
1912      assert(!"bad src2 file");
1913      break;
1914   }
1915
1916   emitCond3(0x31, cc);
1917   emitField(0x30, 1, isSignedType(insn->sType));
1918   emitGPR  (0x08, insn->src(0));
1919   emitGPR  (0x00, insn->def(0));
1920}
1921
1922void
1923CodeEmitterGM107::emitISET()
1924{
1925   const CmpInstruction *insn = this->insn->asCmp();
1926
1927   switch (insn->src(1).getFile()) {
1928   case FILE_GPR:
1929      emitInsn(0x5b500000);
1930      emitGPR (0x14, insn->src(1));
1931      break;
1932   case FILE_MEMORY_CONST:
1933      emitInsn(0x4b500000);
1934      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1935      break;
1936   case FILE_IMMEDIATE:
1937      emitInsn(0x36500000);
1938      emitIMMD(0x14, 19, insn->src(1));
1939      break;
1940   default:
1941      assert(!"bad src1 file");
1942      break;
1943   }
1944
1945   if (insn->op != OP_SET) {
1946      switch (insn->op) {
1947      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1948      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1949      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1950      default:
1951         assert(!"invalid set op");
1952         break;
1953      }
1954      emitPRED(0x27, insn->src(2));
1955   } else {
1956      emitPRED(0x27);
1957   }
1958
1959   emitCond3(0x31, insn->setCond);
1960   emitField(0x30, 1, isSignedType(insn->sType));
1961   emitCC   (0x2f);
1962   emitField(0x2c, 1, insn->dType == TYPE_F32);
1963   emitX    (0x2b);
1964   emitGPR  (0x08, insn->src(0));
1965   emitGPR  (0x00, insn->def(0));
1966}
1967
1968void
1969CodeEmitterGM107::emitISETP()
1970{
1971   const CmpInstruction *insn = this->insn->asCmp();
1972
1973   switch (insn->src(1).getFile()) {
1974   case FILE_GPR:
1975      emitInsn(0x5b600000);
1976      emitGPR (0x14, insn->src(1));
1977      break;
1978   case FILE_MEMORY_CONST:
1979      emitInsn(0x4b600000);
1980      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1981      break;
1982   case FILE_IMMEDIATE:
1983      emitInsn(0x36600000);
1984      emitIMMD(0x14, 19, insn->src(1));
1985      break;
1986   default:
1987      assert(!"bad src1 file");
1988      break;
1989   }
1990
1991   if (insn->op != OP_SET) {
1992      switch (insn->op) {
1993      case OP_SET_AND: emitField(0x2d, 2, 0); break;
1994      case OP_SET_OR : emitField(0x2d, 2, 1); break;
1995      case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1996      default:
1997         assert(!"invalid set op");
1998         break;
1999      }
2000      emitPRED(0x27, insn->src(2));
2001   } else {
2002      emitPRED(0x27);
2003   }
2004
2005   emitCond3(0x31, insn->setCond);
2006   emitField(0x30, 1, isSignedType(insn->sType));
2007   emitX    (0x2b);
2008   emitGPR  (0x08, insn->src(0));
2009   emitPRED (0x03, insn->def(0));
2010   if (insn->defExists(1))
2011      emitPRED(0x00, insn->def(1));
2012   else
2013      emitPRED(0x00);
2014}
2015
2016void
2017CodeEmitterGM107::emitSHL()
2018{
2019   switch (insn->src(1).getFile()) {
2020   case FILE_GPR:
2021      emitInsn(0x5c480000);
2022      emitGPR (0x14, insn->src(1));
2023      break;
2024   case FILE_MEMORY_CONST:
2025      emitInsn(0x4c480000);
2026      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2027      break;
2028   case FILE_IMMEDIATE:
2029      emitInsn(0x38480000);
2030      emitIMMD(0x14, 19, insn->src(1));
2031      break;
2032   default:
2033      assert(!"bad src1 file");
2034      break;
2035   }
2036
2037   emitCC   (0x2f);
2038   emitX    (0x2b);
2039   emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2040   emitGPR  (0x08, insn->src(0));
2041   emitGPR  (0x00, insn->def(0));
2042}
2043
2044void
2045CodeEmitterGM107::emitSHR()
2046{
2047   switch (insn->src(1).getFile()) {
2048   case FILE_GPR:
2049      emitInsn(0x5c280000);
2050      emitGPR (0x14, insn->src(1));
2051      break;
2052   case FILE_MEMORY_CONST:
2053      emitInsn(0x4c280000);
2054      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2055      break;
2056   case FILE_IMMEDIATE:
2057      emitInsn(0x38280000);
2058      emitIMMD(0x14, 19, insn->src(1));
2059      break;
2060   default:
2061      assert(!"bad src1 file");
2062      break;
2063   }
2064
2065   emitField(0x30, 1, isSignedType(insn->dType));
2066   emitCC   (0x2f);
2067   emitX    (0x2c);
2068   emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2069   emitGPR  (0x08, insn->src(0));
2070   emitGPR  (0x00, insn->def(0));
2071}
2072
2073void
2074CodeEmitterGM107::emitPOPC()
2075{
2076   switch (insn->src(0).getFile()) {
2077   case FILE_GPR:
2078      emitInsn(0x5c080000);
2079      emitGPR (0x14, insn->src(0));
2080      break;
2081   case FILE_MEMORY_CONST:
2082      emitInsn(0x4c080000);
2083      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2084      break;
2085   case FILE_IMMEDIATE:
2086      emitInsn(0x38080000);
2087      emitIMMD(0x14, 19, insn->src(0));
2088      break;
2089   default:
2090      assert(!"bad src1 file");
2091      break;
2092   }
2093
2094   emitINV(0x28, insn->src(0));
2095   emitGPR(0x00, insn->def(0));
2096}
2097
2098void
2099CodeEmitterGM107::emitBFI()
2100{
2101   switch(insn->src(2).getFile()) {
2102   case FILE_GPR:
2103      switch (insn->src(1).getFile()) {
2104      case FILE_GPR:
2105         emitInsn(0x5bf00000);
2106         emitGPR (0x14, insn->src(1));
2107         break;
2108      case FILE_MEMORY_CONST:
2109         emitInsn(0x4bf00000);
2110         emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2111         break;
2112      case FILE_IMMEDIATE:
2113         emitInsn(0x36f00000);
2114         emitIMMD(0x14, 19, insn->src(1));
2115         break;
2116      default:
2117         assert(!"bad src1 file");
2118         break;
2119      }
2120      emitGPR (0x27, insn->src(2));
2121      break;
2122   case FILE_MEMORY_CONST:
2123      emitInsn(0x53f00000);
2124      emitGPR (0x27, insn->src(1));
2125      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2126      break;
2127   default:
2128      assert(!"bad src2 file");
2129      break;
2130   }
2131
2132   emitCC   (0x2f);
2133   emitGPR  (0x08, insn->src(0));
2134   emitGPR  (0x00, insn->def(0));
2135}
2136
2137void
2138CodeEmitterGM107::emitBFE()
2139{
2140   switch (insn->src(1).getFile()) {
2141   case FILE_GPR:
2142      emitInsn(0x5c000000);
2143      emitGPR (0x14, insn->src(1));
2144      break;
2145   case FILE_MEMORY_CONST:
2146      emitInsn(0x4c000000);
2147      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2148      break;
2149   case FILE_IMMEDIATE:
2150      emitInsn(0x38000000);
2151      emitIMMD(0x14, 19, insn->src(1));
2152      break;
2153   default:
2154      assert(!"bad src1 file");
2155      break;
2156   }
2157
2158   emitField(0x30, 1, isSignedType(insn->dType));
2159   emitCC   (0x2f);
2160   emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2161   emitGPR  (0x08, insn->src(0));
2162   emitGPR  (0x00, insn->def(0));
2163}
2164
2165void
2166CodeEmitterGM107::emitFLO()
2167{
2168   switch (insn->src(0).getFile()) {
2169   case FILE_GPR:
2170      emitInsn(0x5c300000);
2171      emitGPR (0x14, insn->src(0));
2172      break;
2173   case FILE_MEMORY_CONST:
2174      emitInsn(0x4c300000);
2175      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2176      break;
2177   case FILE_IMMEDIATE:
2178      emitInsn(0x38300000);
2179      emitIMMD(0x14, 19, insn->src(0));
2180      break;
2181   default:
2182      assert(!"bad src1 file");
2183      break;
2184   }
2185
2186   emitField(0x30, 1, isSignedType(insn->dType));
2187   emitCC   (0x2f);
2188   emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2189   emitINV  (0x28, insn->src(0));
2190   emitGPR  (0x00, insn->def(0));
2191}
2192
2193/*******************************************************************************
2194 * memory
2195 ******************************************************************************/
2196
2197void
2198CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2199{
2200   int data = 0;
2201
2202   switch (typeSizeof(type)) {
2203   case  1: data = isSignedType(type) ? 1 : 0; break;
2204   case  2: data = isSignedType(type) ? 3 : 2; break;
2205   case  4: data = 4; break;
2206   case  8: data = 5; break;
2207   case 16: data = 6; break;
2208   default:
2209      assert(!"bad type");
2210      break;
2211   }
2212
2213   emitField(pos, 3, data);
2214}
2215
2216void
2217CodeEmitterGM107::emitLDSTc(int pos)
2218{
2219   int mode = 0;
2220
2221   switch (insn->cache) {
2222   case CACHE_CA: mode = 0; break;
2223   case CACHE_CG: mode = 1; break;
2224   case CACHE_CS: mode = 2; break;
2225   case CACHE_CV: mode = 3; break;
2226   default:
2227      assert(!"invalid caching mode");
2228      break;
2229   }
2230
2231   emitField(pos, 2, mode);
2232}
2233
2234void
2235CodeEmitterGM107::emitLDC()
2236{
2237   emitInsn (0xef900000);
2238   emitLDSTs(0x30, insn->dType);
2239   emitField(0x2c, 2, insn->subOp);
2240   emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2241   emitGPR  (0x00, insn->def(0));
2242}
2243
2244void
2245CodeEmitterGM107::emitLDL()
2246{
2247   emitInsn (0xef400000);
2248   emitLDSTs(0x30, insn->dType);
2249   emitLDSTc(0x2c);
2250   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2251   emitGPR  (0x00, insn->def(0));
2252}
2253
2254void
2255CodeEmitterGM107::emitLDS()
2256{
2257   emitInsn (0xef480000);
2258   emitLDSTs(0x30, insn->dType);
2259   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2260   emitGPR  (0x00, insn->def(0));
2261}
2262
2263void
2264CodeEmitterGM107::emitLD()
2265{
2266   emitInsn (0x80000000);
2267   emitPRED (0x3a);
2268   emitLDSTc(0x38);
2269   emitLDSTs(0x35, insn->dType);
2270   emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2271   emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2272   emitGPR  (0x00, insn->def(0));
2273}
2274
2275void
2276CodeEmitterGM107::emitSTL()
2277{
2278   emitInsn (0xef500000);
2279   emitLDSTs(0x30, insn->dType);
2280   emitLDSTc(0x2c);
2281   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2282   emitGPR  (0x00, insn->src(1));
2283}
2284
2285void
2286CodeEmitterGM107::emitSTS()
2287{
2288   emitInsn (0xef580000);
2289   emitLDSTs(0x30, insn->dType);
2290   emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2291   emitGPR  (0x00, insn->src(1));
2292}
2293
2294void
2295CodeEmitterGM107::emitST()
2296{
2297   emitInsn (0xa0000000);
2298   emitPRED (0x3a);
2299   emitLDSTc(0x38);
2300   emitLDSTs(0x35, insn->dType);
2301   emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2302   emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2303   emitGPR  (0x00, insn->src(1));
2304}
2305
2306void
2307CodeEmitterGM107::emitALD()
2308{
2309   emitInsn (0xefd80000);
2310   emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2311   emitGPR  (0x27, insn->src(0).getIndirect(1));
2312   emitO    (0x20);
2313   emitP    (0x1f);
2314   emitADDR (0x08, 20, 10, 0, insn->src(0));
2315   emitGPR  (0x00, insn->def(0));
2316}
2317
2318void
2319CodeEmitterGM107::emitAST()
2320{
2321   emitInsn (0xeff00000);
2322   emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2323   emitGPR  (0x27, insn->src(0).getIndirect(1));
2324   emitP    (0x1f);
2325   emitADDR (0x08, 20, 10, 0, insn->src(0));
2326   emitGPR  (0x00, insn->src(1));
2327}
2328
2329void
2330CodeEmitterGM107::emitISBERD()
2331{
2332   emitInsn(0xefd00000);
2333   emitGPR (0x08, insn->src(0));
2334   emitGPR (0x00, insn->def(0));
2335}
2336
2337void
2338CodeEmitterGM107::emitAL2P()
2339{
2340   emitInsn (0xefa00000);
2341   emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2342   emitPRED (0x2c);
2343   emitO    (0x20);
2344   emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2345   emitGPR  (0x08, insn->src(0).getIndirect(0));
2346   emitGPR  (0x00, insn->def(0));
2347}
2348
2349static void
2350interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2351{
2352   int ipa = entry->ipa;
2353   int reg = entry->reg;
2354   int loc = entry->loc;
2355
2356   if (data.flatshade &&
2357       (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2358      ipa = NV50_IR_INTERP_FLAT;
2359      reg = 0xff;
2360   } else if (data.force_persample_interp &&
2361              (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2362              (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2363      ipa |= NV50_IR_INTERP_CENTROID;
2364   }
2365   code[loc + 1] &= ~(0xf << 0x14);
2366   code[loc + 1] |= (ipa & 0x3) << 0x16;
2367   code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2368   code[loc + 0] &= ~(0xff << 0x14);
2369   code[loc + 0] |= reg << 0x14;
2370}
2371
2372void
2373CodeEmitterGM107::emitIPA()
2374{
2375   int ipam = 0, ipas = 0;
2376
2377   switch (insn->getInterpMode()) {
2378   case NV50_IR_INTERP_LINEAR     : ipam = 0; break;
2379   case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2380   case NV50_IR_INTERP_FLAT       : ipam = 2; break;
2381   case NV50_IR_INTERP_SC         : ipam = 3; break;
2382   default:
2383      assert(!"invalid ipa mode");
2384      break;
2385   }
2386
2387   switch (insn->getSampleMode()) {
2388   case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2389   case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2390   case NV50_IR_INTERP_OFFSET  : ipas = 2; break;
2391   default:
2392      assert(!"invalid ipa sample mode");
2393      break;
2394   }
2395
2396   emitInsn (0xe0000000);
2397   emitField(0x36, 2, ipam);
2398   emitField(0x34, 2, ipas);
2399   emitSAT  (0x33);
2400   emitField(0x2f, 3, 7);
2401   emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2402   if ((code[0] & 0x0000ff00) != 0x0000ff00)
2403      code[1] |= 0x00000040; /* .idx */
2404   emitGPR(0x00, insn->def(0));
2405
2406   if (insn->op == OP_PINTERP) {
2407      emitGPR(0x14, insn->src(1));
2408      if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2409         emitGPR(0x27, insn->src(2));
2410      addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2411   } else {
2412      if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2413         emitGPR(0x27, insn->src(1));
2414      emitGPR(0x14);
2415      addInterp(insn->ipa, 0xff, interpApply);
2416   }
2417
2418   if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2419      emitGPR(0x27);
2420}
2421
2422void
2423CodeEmitterGM107::emitATOM()
2424{
2425   unsigned dType, subOp;
2426
2427   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2428      switch (insn->dType) {
2429      case TYPE_U32: dType = 0; break;
2430      case TYPE_U64: dType = 1; break;
2431      default: assert(!"unexpected dType"); dType = 0; break;
2432      }
2433      subOp = 15;
2434
2435      emitInsn (0xee000000);
2436   } else {
2437      switch (insn->dType) {
2438      case TYPE_U32: dType = 0; break;
2439      case TYPE_S32: dType = 1; break;
2440      case TYPE_U64: dType = 2; break;
2441      case TYPE_F32: dType = 3; break;
2442      case TYPE_B128: dType = 4; break;
2443      case TYPE_S64: dType = 5; break;
2444      default: assert(!"unexpected dType"); dType = 0; break;
2445      }
2446      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2447         subOp = 8;
2448      else
2449         subOp = insn->subOp;
2450
2451      emitInsn (0xed000000);
2452   }
2453
2454   emitField(0x34, 4, subOp);
2455   emitField(0x31, 3, dType);
2456   emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2457   emitGPR  (0x14, insn->src(1));
2458   emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2459   emitGPR  (0x00, insn->def(0));
2460}
2461
2462void
2463CodeEmitterGM107::emitATOMS()
2464{
2465   unsigned dType, subOp;
2466
2467   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2468      switch (insn->dType) {
2469      case TYPE_U32: dType = 0; break;
2470      case TYPE_U64: dType = 1; break;
2471      default: assert(!"unexpected dType"); dType = 0; break;
2472      }
2473      subOp = 4;
2474
2475      emitInsn (0xee000000);
2476      emitField(0x34, 1, dType);
2477   } else {
2478      switch (insn->dType) {
2479      case TYPE_U32: dType = 0; break;
2480      case TYPE_S32: dType = 1; break;
2481      case TYPE_U64: dType = 2; break;
2482      case TYPE_S64: dType = 3; break;
2483      default: assert(!"unexpected dType"); dType = 0; break;
2484      }
2485
2486      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2487         subOp = 8;
2488      else
2489         subOp = insn->subOp;
2490
2491      emitInsn (0xec000000);
2492      emitField(0x1c, 3, dType);
2493   }
2494
2495   emitField(0x34, 4, subOp);
2496   emitGPR  (0x14, insn->src(1));
2497   emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2498   emitGPR  (0x00, insn->def(0));
2499}
2500
2501void
2502CodeEmitterGM107::emitRED()
2503{
2504   unsigned dType;
2505
2506   switch (insn->dType) {
2507   case TYPE_U32: dType = 0; break;
2508   case TYPE_S32: dType = 1; break;
2509   case TYPE_U64: dType = 2; break;
2510   case TYPE_F32: dType = 3; break;
2511   case TYPE_B128: dType = 4; break;
2512   case TYPE_S64: dType = 5; break;
2513   default: assert(!"unexpected dType"); dType = 0; break;
2514   }
2515
2516   emitInsn (0xebf80000);
2517   emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2518   emitField(0x17, 3, insn->subOp);
2519   emitField(0x14, 3, dType);
2520   emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2521   emitGPR  (0x00, insn->src(1));
2522}
2523
2524void
2525CodeEmitterGM107::emitCCTL()
2526{
2527   unsigned width;
2528   if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2529      emitInsn(0xef600000);
2530      width = 30;
2531   } else {
2532      emitInsn(0xef800000);
2533      width = 22;
2534   }
2535   emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2536   emitADDR (0x08, 0x16, width, 2, insn->src(0));
2537   emitField(0x00, 4, insn->subOp);
2538}
2539
2540/*******************************************************************************
2541 * surface
2542 ******************************************************************************/
2543
2544void
2545CodeEmitterGM107::emitPIXLD()
2546{
2547   emitInsn (0xefe80000);
2548   emitPRED (0x2d);
2549   emitField(0x1f, 3, insn->subOp);
2550   emitGPR  (0x08, insn->src(0));
2551   emitGPR  (0x00, insn->def(0));
2552}
2553
2554/*******************************************************************************
2555 * texture
2556 ******************************************************************************/
2557
2558void
2559CodeEmitterGM107::emitTEXs(int pos)
2560{
2561   int src1 = insn->predSrc == 1 ? 2 : 1;
2562   if (insn->srcExists(src1))
2563      emitGPR(pos, insn->src(src1));
2564   else
2565      emitGPR(pos);
2566}
2567
2568void
2569CodeEmitterGM107::emitTEX()
2570{
2571   const TexInstruction *insn = this->insn->asTex();
2572   int lodm = 0;
2573
2574   if (!insn->tex.levelZero) {
2575      switch (insn->op) {
2576      case OP_TEX: lodm = 0; break;
2577      case OP_TXB: lodm = 2; break;
2578      case OP_TXL: lodm = 3; break;
2579      default:
2580         assert(!"invalid tex op");
2581         break;
2582      }
2583   } else {
2584      lodm = 1;
2585   }
2586
2587   if (insn->tex.rIndirectSrc >= 0) {
2588      emitInsn (0xdeb80000);
2589      emitField(0x25, 2, lodm);
2590      emitField(0x24, 1, insn->tex.useOffsets == 1);
2591   } else {
2592      emitInsn (0xc0380000);
2593      emitField(0x37, 2, lodm);
2594      emitField(0x36, 1, insn->tex.useOffsets == 1);
2595      emitField(0x24, 13, insn->tex.r);
2596   }
2597
2598   emitField(0x32, 1, insn->tex.target.isShadow());
2599   emitField(0x31, 1, insn->tex.liveOnly);
2600   emitField(0x23, 1, insn->tex.derivAll);
2601   emitField(0x1f, 4, insn->tex.mask);
2602   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2603                      insn->tex.target.getDim() - 1);
2604   emitField(0x1c, 1, insn->tex.target.isArray());
2605   emitTEXs (0x14);
2606   emitGPR  (0x08, insn->src(0));
2607   emitGPR  (0x00, insn->def(0));
2608}
2609
2610void
2611CodeEmitterGM107::emitTLD()
2612{
2613   const TexInstruction *insn = this->insn->asTex();
2614
2615   if (insn->tex.rIndirectSrc >= 0) {
2616      emitInsn (0xdd380000);
2617   } else {
2618      emitInsn (0xdc380000);
2619      emitField(0x24, 13, insn->tex.r);
2620   }
2621
2622   emitField(0x37, 1, insn->tex.levelZero == 0);
2623   emitField(0x32, 1, insn->tex.target.isMS());
2624   emitField(0x31, 1, insn->tex.liveOnly);
2625   emitField(0x23, 1, insn->tex.useOffsets == 1);
2626   emitField(0x1f, 4, insn->tex.mask);
2627   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2628                      insn->tex.target.getDim() - 1);
2629   emitField(0x1c, 1, insn->tex.target.isArray());
2630   emitTEXs (0x14);
2631   emitGPR  (0x08, insn->src(0));
2632   emitGPR  (0x00, insn->def(0));
2633}
2634
2635void
2636CodeEmitterGM107::emitTLD4()
2637{
2638   const TexInstruction *insn = this->insn->asTex();
2639
2640   if (insn->tex.rIndirectSrc >= 0) {
2641      emitInsn (0xdef80000);
2642      emitField(0x26, 2, insn->tex.gatherComp);
2643      emitField(0x25, 2, insn->tex.useOffsets == 4);
2644      emitField(0x24, 2, insn->tex.useOffsets == 1);
2645   } else {
2646      emitInsn (0xc8380000);
2647      emitField(0x38, 2, insn->tex.gatherComp);
2648      emitField(0x37, 2, insn->tex.useOffsets == 4);
2649      emitField(0x36, 2, insn->tex.useOffsets == 1);
2650      emitField(0x24, 13, insn->tex.r);
2651   }
2652
2653   emitField(0x32, 1, insn->tex.target.isShadow());
2654   emitField(0x31, 1, insn->tex.liveOnly);
2655   emitField(0x23, 1, insn->tex.derivAll);
2656   emitField(0x1f, 4, insn->tex.mask);
2657   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2658                      insn->tex.target.getDim() - 1);
2659   emitField(0x1c, 1, insn->tex.target.isArray());
2660   emitTEXs (0x14);
2661   emitGPR  (0x08, insn->src(0));
2662   emitGPR  (0x00, insn->def(0));
2663}
2664
2665void
2666CodeEmitterGM107::emitTXD()
2667{
2668   const TexInstruction *insn = this->insn->asTex();
2669
2670   if (insn->tex.rIndirectSrc >= 0) {
2671      emitInsn (0xde780000);
2672   } else {
2673      emitInsn (0xde380000);
2674      emitField(0x24, 13, insn->tex.r);
2675   }
2676
2677   emitField(0x31, 1, insn->tex.liveOnly);
2678   emitField(0x23, 1, insn->tex.useOffsets == 1);
2679   emitField(0x1f, 4, insn->tex.mask);
2680   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2681                      insn->tex.target.getDim() - 1);
2682   emitField(0x1c, 1, insn->tex.target.isArray());
2683   emitTEXs (0x14);
2684   emitGPR  (0x08, insn->src(0));
2685   emitGPR  (0x00, insn->def(0));
2686}
2687
2688void
2689CodeEmitterGM107::emitTMML()
2690{
2691   const TexInstruction *insn = this->insn->asTex();
2692
2693   if (insn->tex.rIndirectSrc >= 0) {
2694      emitInsn (0xdf600000);
2695   } else {
2696      emitInsn (0xdf580000);
2697      emitField(0x24, 13, insn->tex.r);
2698   }
2699
2700   emitField(0x31, 1, insn->tex.liveOnly);
2701   emitField(0x23, 1, insn->tex.derivAll);
2702   emitField(0x1f, 4, insn->tex.mask);
2703   emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2704                      insn->tex.target.getDim() - 1);
2705   emitField(0x1c, 1, insn->tex.target.isArray());
2706   emitTEXs (0x14);
2707   emitGPR  (0x08, insn->src(0));
2708   emitGPR  (0x00, insn->def(0));
2709}
2710
2711void
2712CodeEmitterGM107::emitTXQ()
2713{
2714   const TexInstruction *insn = this->insn->asTex();
2715   int type = 0;
2716
2717   switch (insn->tex.query) {
2718   case TXQ_DIMS           : type = 0x01; break;
2719   case TXQ_TYPE           : type = 0x02; break;
2720   case TXQ_SAMPLE_POSITION: type = 0x05; break;
2721   case TXQ_FILTER         : type = 0x10; break;
2722   case TXQ_LOD            : type = 0x12; break;
2723   case TXQ_WRAP           : type = 0x14; break;
2724   case TXQ_BORDER_COLOUR  : type = 0x16; break;
2725   default:
2726      assert(!"invalid txq query");
2727      break;
2728   }
2729
2730   if (insn->tex.rIndirectSrc >= 0) {
2731      emitInsn (0xdf500000);
2732   } else {
2733      emitInsn (0xdf480000);
2734      emitField(0x24, 13, insn->tex.r);
2735   }
2736
2737   emitField(0x31, 1, insn->tex.liveOnly);
2738   emitField(0x1f, 4, insn->tex.mask);
2739   emitField(0x16, 6, type);
2740   emitGPR  (0x08, insn->src(0));
2741   emitGPR  (0x00, insn->def(0));
2742}
2743
2744void
2745CodeEmitterGM107::emitDEPBAR()
2746{
2747   emitInsn (0xf0f00000);
2748   emitField(0x1d, 1, 1); /* le */
2749   emitField(0x1a, 3, 5);
2750   emitField(0x14, 6, insn->subOp);
2751   emitField(0x00, 6, insn->subOp);
2752}
2753
2754/*******************************************************************************
2755 * misc
2756 ******************************************************************************/
2757
2758void
2759CodeEmitterGM107::emitNOP()
2760{
2761   emitInsn(0x50b00000);
2762}
2763
2764void
2765CodeEmitterGM107::emitKIL()
2766{
2767   emitInsn (0xe3300000);
2768   emitCond5(0x00, CC_TR);
2769}
2770
2771void
2772CodeEmitterGM107::emitOUT()
2773{
2774   const int cut  = insn->op == OP_RESTART || insn->subOp;
2775   const int emit = insn->op == OP_EMIT;
2776
2777   switch (insn->src(1).getFile()) {
2778   case FILE_GPR:
2779      emitInsn(0xfbe00000);
2780      emitGPR (0x14, insn->src(1));
2781      break;
2782   case FILE_IMMEDIATE:
2783      emitInsn(0xf6e00000);
2784      emitIMMD(0x14, 19, insn->src(1));
2785      break;
2786   case FILE_MEMORY_CONST:
2787      emitInsn(0xebe00000);
2788      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2789      break;
2790   default:
2791      assert(!"bad src1 file");
2792      break;
2793   }
2794
2795   emitField(0x27, 2, (cut << 1) | emit);
2796   emitGPR  (0x08, insn->src(0));
2797   emitGPR  (0x00, insn->def(0));
2798}
2799
2800void
2801CodeEmitterGM107::emitBAR()
2802{
2803   uint8_t subop;
2804
2805   emitInsn (0xf0a80000);
2806
2807   switch (insn->subOp) {
2808   case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2809   case NV50_IR_SUBOP_BAR_RED_AND:  subop = 0x0a; break;
2810   case NV50_IR_SUBOP_BAR_RED_OR:   subop = 0x12; break;
2811   case NV50_IR_SUBOP_BAR_ARRIVE:   subop = 0x81; break;
2812   default:
2813      subop = 0x80;
2814      assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2815      break;
2816   }
2817
2818   emitField(0x20, 8, subop);
2819
2820   // barrier id
2821   if (insn->src(0).getFile() == FILE_GPR) {
2822      emitGPR(0x08, insn->src(0));
2823   } else {
2824      ImmediateValue *imm = insn->getSrc(0)->asImm();
2825      assert(imm);
2826      emitField(0x08, 8, imm->reg.data.u32);
2827      emitField(0x2b, 1, 1);
2828   }
2829
2830   // thread count
2831   if (insn->src(1).getFile() == FILE_GPR) {
2832      emitGPR(0x14, insn->src(1));
2833   } else {
2834      ImmediateValue *imm = insn->getSrc(0)->asImm();
2835      assert(imm);
2836      emitField(0x14, 12, imm->reg.data.u32);
2837      emitField(0x2c, 1, 1);
2838   }
2839
2840   if (insn->srcExists(2) && (insn->predSrc != 2)) {
2841      emitPRED (0x27, insn->src(2));
2842      emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2843   } else {
2844      emitField(0x27, 3, 7);
2845   }
2846}
2847
2848void
2849CodeEmitterGM107::emitMEMBAR()
2850{
2851   emitInsn (0xef980000);
2852   emitField(0x08, 2, insn->subOp >> 2);
2853}
2854
2855void
2856CodeEmitterGM107::emitVOTE()
2857{
2858   assert(insn->src(0).getFile() == FILE_PREDICATE);
2859
2860   int r = -1, p = -1;
2861   for (int i = 0; insn->defExists(i); i++) {
2862      if (insn->def(i).getFile() == FILE_GPR)
2863         r = i;
2864      else if (insn->def(i).getFile() == FILE_PREDICATE)
2865         p = i;
2866   }
2867
2868   emitInsn (0x50d80000);
2869   emitField(0x30, 2, insn->subOp);
2870   if (r >= 0)
2871      emitGPR  (0x00, insn->def(r));
2872   else
2873      emitGPR  (0x00);
2874   if (p >= 0)
2875      emitPRED (0x2d, insn->def(p));
2876   else
2877      emitPRED (0x2d);
2878   emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2879   emitPRED (0x27, insn->src(0));
2880}
2881
2882void
2883CodeEmitterGM107::emitSUTarget()
2884{
2885   const TexInstruction *insn = this->insn->asTex();
2886   int target = 0;
2887
2888   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2889
2890   if (insn->tex.target == TEX_TARGET_BUFFER) {
2891      target = 2;
2892   } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2893      target = 4;
2894   } else if (insn->tex.target == TEX_TARGET_2D ||
2895              insn->tex.target == TEX_TARGET_RECT) {
2896      target = 6;
2897   } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2898              insn->tex.target == TEX_TARGET_CUBE ||
2899              insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2900      target = 8;
2901   } else if (insn->tex.target == TEX_TARGET_3D) {
2902      target = 10;
2903   } else {
2904      assert(insn->tex.target == TEX_TARGET_1D);
2905   }
2906   emitField(0x20, 4, target);
2907}
2908
2909void
2910CodeEmitterGM107::emitSUHandle(const int s)
2911{
2912   const TexInstruction *insn = this->insn->asTex();
2913
2914   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2915
2916   if (insn->src(s).getFile() == FILE_GPR) {
2917      emitGPR(0x27, insn->src(s));
2918   } else {
2919      ImmediateValue *imm = insn->getSrc(s)->asImm();
2920      assert(imm);
2921      emitField(0x33, 1, 1);
2922      emitField(0x24, 13, imm->reg.data.u32);
2923   }
2924}
2925
2926void
2927CodeEmitterGM107::emitSUSTx()
2928{
2929   const TexInstruction *insn = this->insn->asTex();
2930
2931   emitInsn(0xeb200000);
2932   if (insn->op == OP_SUSTB)
2933      emitField(0x34, 1, 1);
2934   emitSUTarget();
2935
2936   emitLDSTc(0x18);
2937   emitField(0x14, 4, 0xf); // rgba
2938   emitGPR  (0x08, insn->src(0));
2939   emitGPR  (0x00, insn->src(1));
2940
2941   emitSUHandle(2);
2942}
2943
2944void
2945CodeEmitterGM107::emitSULDx()
2946{
2947   const TexInstruction *insn = this->insn->asTex();
2948   int type = 0;
2949
2950   emitInsn(0xeb000000);
2951   if (insn->op == OP_SULDB)
2952      emitField(0x34, 1, 1);
2953   emitSUTarget();
2954
2955   switch (insn->dType) {
2956   case TYPE_S8:   type = 1; break;
2957   case TYPE_U16:  type = 2; break;
2958   case TYPE_S16:  type = 3; break;
2959   case TYPE_U32:  type = 4; break;
2960   case TYPE_U64:  type = 5; break;
2961   case TYPE_B128: type = 6; break;
2962   default:
2963      assert(insn->dType == TYPE_U8);
2964      break;
2965   }
2966   emitLDSTc(0x18);
2967   emitField(0x14, 3, type);
2968   emitGPR  (0x00, insn->def(0));
2969   emitGPR  (0x08, insn->src(0));
2970
2971   emitSUHandle(1);
2972}
2973
2974void
2975CodeEmitterGM107::emitSUREDx()
2976{
2977   const TexInstruction *insn = this->insn->asTex();
2978   uint8_t type = 0, subOp;
2979
2980   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
2981      emitInsn(0xeac00000);
2982   else
2983      emitInsn(0xea600000);
2984
2985   if (insn->op == OP_SUREDB)
2986      emitField(0x34, 1, 1);
2987   emitSUTarget();
2988
2989   // destination type
2990   switch (insn->dType) {
2991   case TYPE_S32: type = 1; break;
2992   case TYPE_U64: type = 2; break;
2993   case TYPE_F32: type = 3; break;
2994   case TYPE_S64: type = 5; break;
2995   default:
2996      assert(insn->dType == TYPE_U32);
2997      break;
2998   }
2999
3000   // atomic operation
3001   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3002      subOp = 0;
3003   } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3004      subOp = 8;
3005   } else {
3006      subOp = insn->subOp;
3007   }
3008
3009   emitField(0x24, 3, type);
3010   emitField(0x1d, 4, subOp);
3011   emitGPR  (0x14, insn->src(1));
3012   emitGPR  (0x08, insn->src(0));
3013   emitGPR  (0x00, insn->def(0));
3014
3015   emitSUHandle(2);
3016}
3017
3018/*******************************************************************************
3019 * assembler front-end
3020 ******************************************************************************/
3021
3022bool
3023CodeEmitterGM107::emitInstruction(Instruction *i)
3024{
3025   const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3026   bool ret = true;
3027
3028   insn = i;
3029
3030   if (insn->encSize != 8) {
3031      ERROR("skipping undecodable instruction: "); insn->print();
3032      return false;
3033   } else
3034   if (codeSize + size > codeSizeLimit) {
3035      ERROR("code emitter output buffer too small\n");
3036      return false;
3037   }
3038
3039   if (writeIssueDelays) {
3040      int n = ((codeSize & 0x1f) / 8) - 1;
3041      if (n < 0) {
3042         data = code;
3043         data[0] = 0x00000000;
3044         data[1] = 0x00000000;
3045         code += 2;
3046         codeSize += 8;
3047         n++;
3048      }
3049
3050      emitField(data, n * 21, 21, insn->sched);
3051   }
3052
3053   switch (insn->op) {
3054   case OP_EXIT:
3055      emitEXIT();
3056      break;
3057   case OP_BRA:
3058      emitBRA();
3059      break;
3060   case OP_CALL:
3061      emitCAL();
3062      break;
3063   case OP_PRECONT:
3064      emitPCNT();
3065      break;
3066   case OP_CONT:
3067      emitCONT();
3068      break;
3069   case OP_PREBREAK:
3070      emitPBK();
3071      break;
3072   case OP_BREAK:
3073      emitBRK();
3074      break;
3075   case OP_PRERET:
3076      emitPRET();
3077      break;
3078   case OP_RET:
3079      emitRET();
3080      break;
3081   case OP_JOINAT:
3082      emitSSY();
3083      break;
3084   case OP_JOIN:
3085      emitSYNC();
3086      break;
3087   case OP_QUADON:
3088      emitSAM();
3089      break;
3090   case OP_QUADPOP:
3091      emitRAM();
3092      break;
3093   case OP_MOV:
3094      emitMOV();
3095      break;
3096   case OP_RDSV:
3097      emitS2R();
3098      break;
3099   case OP_ABS:
3100   case OP_NEG:
3101   case OP_SAT:
3102   case OP_FLOOR:
3103   case OP_CEIL:
3104   case OP_TRUNC:
3105   case OP_CVT:
3106      if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3107                                 insn->src(0).getFile() == FILE_PREDICATE)) {
3108         emitMOV();
3109      } else if (isFloatType(insn->dType)) {
3110         if (isFloatType(insn->sType))
3111            emitF2F();
3112         else
3113            emitI2F();
3114      } else {
3115         if (isFloatType(insn->sType))
3116            emitF2I();
3117         else
3118            emitI2I();
3119      }
3120      break;
3121   case OP_SHFL:
3122      emitSHFL();
3123      break;
3124   case OP_ADD:
3125   case OP_SUB:
3126      if (isFloatType(insn->dType)) {
3127         if (insn->dType == TYPE_F64)
3128            emitDADD();
3129         else
3130            emitFADD();
3131      } else {
3132         emitIADD();
3133      }
3134      break;
3135   case OP_MUL:
3136      if (isFloatType(insn->dType)) {
3137         if (insn->dType == TYPE_F64)
3138            emitDMUL();
3139         else
3140            emitFMUL();
3141      } else {
3142         emitIMUL();
3143      }
3144      break;
3145   case OP_MAD:
3146   case OP_FMA:
3147      if (isFloatType(insn->dType)) {
3148         if (insn->dType == TYPE_F64)
3149            emitDFMA();
3150         else
3151            emitFFMA();
3152      } else {
3153         emitIMAD();
3154      }
3155      break;
3156   case OP_SHLADD:
3157      emitISCADD();
3158      break;
3159   case OP_MIN:
3160   case OP_MAX:
3161      if (isFloatType(insn->dType)) {
3162         if (insn->dType == TYPE_F64)
3163            emitDMNMX();
3164         else
3165            emitFMNMX();
3166      } else {
3167         emitIMNMX();
3168      }
3169      break;
3170   case OP_SHL:
3171      emitSHL();
3172      break;
3173   case OP_SHR:
3174      emitSHR();
3175      break;
3176   case OP_POPCNT:
3177      emitPOPC();
3178      break;
3179   case OP_INSBF:
3180      emitBFI();
3181      break;
3182   case OP_EXTBF:
3183      emitBFE();
3184      break;
3185   case OP_BFIND:
3186      emitFLO();
3187      break;
3188   case OP_SLCT:
3189      if (isFloatType(insn->dType))
3190         emitFCMP();
3191      else
3192         emitICMP();
3193      break;
3194   case OP_SET:
3195   case OP_SET_AND:
3196   case OP_SET_OR:
3197   case OP_SET_XOR:
3198      if (insn->def(0).getFile() != FILE_PREDICATE) {
3199         if (isFloatType(insn->sType))
3200            if (insn->sType == TYPE_F64)
3201               emitDSET();
3202            else
3203               emitFSET();
3204         else
3205            emitISET();
3206      } else {
3207         if (isFloatType(insn->sType))
3208            if (insn->sType == TYPE_F64)
3209               emitDSETP();
3210            else
3211               emitFSETP();
3212         else
3213            emitISETP();
3214      }
3215      break;
3216   case OP_SELP:
3217      emitSEL();
3218      break;
3219   case OP_PRESIN:
3220   case OP_PREEX2:
3221      emitRRO();
3222      break;
3223   case OP_COS:
3224   case OP_SIN:
3225   case OP_EX2:
3226   case OP_LG2:
3227   case OP_RCP:
3228   case OP_RSQ:
3229      emitMUFU();
3230      break;
3231   case OP_AND:
3232   case OP_OR:
3233   case OP_XOR:
3234      emitLOP();
3235      break;
3236   case OP_NOT:
3237      emitNOT();
3238      break;
3239   case OP_LOAD:
3240      switch (insn->src(0).getFile()) {
3241      case FILE_MEMORY_CONST : emitLDC(); break;
3242      case FILE_MEMORY_LOCAL : emitLDL(); break;
3243      case FILE_MEMORY_SHARED: emitLDS(); break;
3244      case FILE_MEMORY_GLOBAL: emitLD(); break;
3245      default:
3246         assert(!"invalid load");
3247         emitNOP();
3248         break;
3249      }
3250      break;
3251   case OP_STORE:
3252      switch (insn->src(0).getFile()) {
3253      case FILE_MEMORY_LOCAL : emitSTL(); break;
3254      case FILE_MEMORY_SHARED: emitSTS(); break;
3255      case FILE_MEMORY_GLOBAL: emitST(); break;
3256      default:
3257         assert(!"invalid store");
3258         emitNOP();
3259         break;
3260      }
3261      break;
3262   case OP_ATOM:
3263      if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3264         emitATOMS();
3265      else
3266         if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3267            emitRED();
3268         else
3269            emitATOM();
3270      break;
3271   case OP_CCTL:
3272      emitCCTL();
3273      break;
3274   case OP_VFETCH:
3275      emitALD();
3276      break;
3277   case OP_EXPORT:
3278      emitAST();
3279      break;
3280   case OP_PFETCH:
3281      emitISBERD();
3282      break;
3283   case OP_AFETCH:
3284      emitAL2P();
3285      break;
3286   case OP_LINTERP:
3287   case OP_PINTERP:
3288      emitIPA();
3289      break;
3290   case OP_PIXLD:
3291      emitPIXLD();
3292      break;
3293   case OP_TEX:
3294   case OP_TXB:
3295   case OP_TXL:
3296      emitTEX();
3297      break;
3298   case OP_TXF:
3299      emitTLD();
3300      break;
3301   case OP_TXG:
3302      emitTLD4();
3303      break;
3304   case OP_TXD:
3305      emitTXD();
3306      break;
3307   case OP_TXQ:
3308      emitTXQ();
3309      break;
3310   case OP_TXLQ:
3311      emitTMML();
3312      break;
3313   case OP_TEXBAR:
3314      emitDEPBAR();
3315      break;
3316   case OP_QUADOP:
3317      emitFSWZADD();
3318      break;
3319   case OP_NOP:
3320      emitNOP();
3321      break;
3322   case OP_DISCARD:
3323      emitKIL();
3324      break;
3325   case OP_EMIT:
3326   case OP_RESTART:
3327      emitOUT();
3328      break;
3329   case OP_BAR:
3330      emitBAR();
3331      break;
3332   case OP_MEMBAR:
3333      emitMEMBAR();
3334      break;
3335   case OP_VOTE:
3336      emitVOTE();
3337      break;
3338   case OP_SUSTB:
3339   case OP_SUSTP:
3340      emitSUSTx();
3341      break;
3342   case OP_SULDB:
3343   case OP_SULDP:
3344      emitSULDx();
3345      break;
3346   case OP_SUREDB:
3347   case OP_SUREDP:
3348      emitSUREDx();
3349      break;
3350   default:
3351      assert(!"invalid opcode");
3352      emitNOP();
3353      ret = false;
3354      break;
3355   }
3356
3357   if (insn->join) {
3358      /*XXX*/
3359   }
3360
3361   code += 2;
3362   codeSize += 8;
3363   return ret;
3364}
3365
3366uint32_t
3367CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3368{
3369   return 8;
3370}
3371
3372/*******************************************************************************
3373 * sched data calculator
3374 ******************************************************************************/
3375
3376class SchedDataCalculatorGM107 : public Pass
3377{
3378public:
3379   SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3380
3381private:
3382   struct RegScores
3383   {
3384      struct ScoreData {
3385         int r[256];
3386         int p[8];
3387         int c;
3388      } rd, wr;
3389      int base;
3390
3391      void rebase(const int base)
3392      {
3393         const int delta = this->base - base;
3394         if (!delta)
3395            return;
3396         this->base = 0;
3397
3398         for (int i = 0; i < 256; ++i) {
3399            rd.r[i] += delta;
3400            wr.r[i] += delta;
3401         }
3402         for (int i = 0; i < 8; ++i) {
3403            rd.p[i] += delta;
3404            wr.p[i] += delta;
3405         }
3406         rd.c += delta;
3407         wr.c += delta;
3408      }
3409      void wipe()
3410      {
3411         memset(&rd, 0, sizeof(rd));
3412         memset(&wr, 0, sizeof(wr));
3413      }
3414      int getLatest(const ScoreData& d) const
3415      {
3416         int max = 0;
3417         for (int i = 0; i < 256; ++i)
3418            if (d.r[i] > max)
3419               max = d.r[i];
3420         for (int i = 0; i < 8; ++i)
3421            if (d.p[i] > max)
3422               max = d.p[i];
3423         if (d.c > max)
3424            max = d.c;
3425         return max;
3426      }
3427      inline int getLatestRd() const
3428      {
3429         return getLatest(rd);
3430      }
3431      inline int getLatestWr() const
3432      {
3433         return getLatest(wr);
3434      }
3435      inline int getLatest() const
3436      {
3437         return MAX2(getLatestRd(), getLatestWr());
3438      }
3439      void setMax(const RegScores *that)
3440      {
3441         for (int i = 0; i < 256; ++i) {
3442            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3443            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3444         }
3445         for (int i = 0; i < 8; ++i) {
3446            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3447            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3448         }
3449         rd.c = MAX2(rd.c, that->rd.c);
3450         wr.c = MAX2(wr.c, that->wr.c);
3451      }
3452      void print(int cycle)
3453      {
3454         for (int i = 0; i < 256; ++i) {
3455            if (rd.r[i] > cycle)
3456               INFO("rd $r%i @ %i\n", i, rd.r[i]);
3457            if (wr.r[i] > cycle)
3458               INFO("wr $r%i @ %i\n", i, wr.r[i]);
3459         }
3460         for (int i = 0; i < 8; ++i) {
3461            if (rd.p[i] > cycle)
3462               INFO("rd $p%i @ %i\n", i, rd.p[i]);
3463            if (wr.p[i] > cycle)
3464               INFO("wr $p%i @ %i\n", i, wr.p[i]);
3465         }
3466         if (rd.c > cycle)
3467            INFO("rd $c @ %i\n", rd.c);
3468         if (wr.c > cycle)
3469            INFO("wr $c @ %i\n", wr.c);
3470      }
3471   };
3472
3473   RegScores *score; // for current BB
3474   std::vector<RegScores> scoreBoards;
3475
3476   const TargetGM107 *targ;
3477   bool visit(Function *);
3478   bool visit(BasicBlock *);
3479
3480   void commitInsn(const Instruction *, int);
3481   int calcDelay(const Instruction *, int) const;
3482   void setDelay(Instruction *, int, const Instruction *);
3483   void recordWr(const Value *, int, int);
3484   void checkRd(const Value *, int, int&) const;
3485
3486   inline void emitYield(Instruction *);
3487   inline void emitStall(Instruction *, uint8_t);
3488   inline void emitReuse(Instruction *, uint8_t);
3489   inline void emitWrDepBar(Instruction *, uint8_t);
3490   inline void emitRdDepBar(Instruction *, uint8_t);
3491   inline void emitWtDepBar(Instruction *, uint8_t);
3492
3493   inline int getStall(const Instruction *) const;
3494   inline int getWrDepBar(const Instruction *) const;
3495   inline int getRdDepBar(const Instruction *) const;
3496   inline int getWtDepBar(const Instruction *) const;
3497
3498   void setReuseFlag(Instruction *);
3499
3500   inline void printSchedInfo(int, const Instruction *) const;
3501
3502   struct LiveBarUse {
3503      LiveBarUse(Instruction *insn, Instruction *usei)
3504         : insn(insn), usei(usei) { }
3505      Instruction *insn;
3506      Instruction *usei;
3507   };
3508
3509   struct LiveBarDef {
3510      LiveBarDef(Instruction *insn, Instruction *defi)
3511         : insn(insn), defi(defi) { }
3512      Instruction *insn;
3513      Instruction *defi;
3514   };
3515
3516   bool insertBarriers(BasicBlock *);
3517
3518   Instruction *findFirstUse(const Instruction *) const;
3519   Instruction *findFirstDef(const Instruction *) const;
3520
3521   bool needRdDepBar(const Instruction *) const;
3522   bool needWrDepBar(const Instruction *) const;
3523};
3524
3525inline void
3526SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3527{
3528   assert(cnt < 16);
3529   insn->sched |= cnt;
3530}
3531
3532inline void
3533SchedDataCalculatorGM107::emitYield(Instruction *insn)
3534{
3535   insn->sched |= 1 << 4;
3536}
3537
3538inline void
3539SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3540{
3541   assert(id < 6);
3542   if ((insn->sched & 0xe0) == 0xe0)
3543      insn->sched ^= 0xe0;
3544   insn->sched |= id << 5;
3545}
3546
3547inline void
3548SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3549{
3550   assert(id < 6);
3551   if ((insn->sched & 0x700) == 0x700)
3552      insn->sched ^= 0x700;
3553   insn->sched |= id << 8;
3554}
3555
3556inline void
3557SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3558{
3559   assert(id < 6);
3560   insn->sched |= 1 << (11 + id);
3561}
3562
3563inline void
3564SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3565{
3566   assert(id < 4);
3567   insn->sched |= 1 << (17 + id);
3568}
3569
3570inline void
3571SchedDataCalculatorGM107::printSchedInfo(int cycle,
3572                                         const Instruction *insn) const
3573{
3574   uint8_t st, yl, wr, rd, wt, ru;
3575
3576   st = (insn->sched & 0x00000f) >> 0;
3577   yl = (insn->sched & 0x000010) >> 4;
3578   wr = (insn->sched & 0x0000e0) >> 5;
3579   rd = (insn->sched & 0x000700) >> 8;
3580   wt = (insn->sched & 0x01f800) >> 11;
3581   ru = (insn->sched & 0x1e0000) >> 17;
3582
3583   INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3584        cycle, st, yl, wr, rd, wt, ru);
3585}
3586
3587inline int
3588SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3589{
3590   return insn->sched & 0xf;
3591}
3592
3593inline int
3594SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3595{
3596   return (insn->sched & 0x0000e0) >> 5;
3597}
3598
3599inline int
3600SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3601{
3602   return (insn->sched & 0x000700) >> 8;
3603}
3604
3605inline int
3606SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3607{
3608   return (insn->sched & 0x01f800) >> 11;
3609}
3610
3611// Emit the reuse flag which allows to make use of the new memory hierarchy
3612// introduced since Maxwell, the operand reuse cache.
3613//
3614// It allows to reduce bank conflicts by caching operands. Each time you issue
3615// an instruction, that flag can tell the hw which operands are going to be
3616// re-used by the next instruction. Note that the next instruction has to use
3617// the same GPR id in the same operand slot.
3618void
3619SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3620{
3621   Instruction *next = insn->next;
3622   BitSet defs(255, 1);
3623
3624   if (!targ->isReuseSupported(insn))
3625      return;
3626
3627   for (int d = 0; insn->defExists(d); ++d) {
3628      const Value *def = insn->def(d).rep();
3629      if (insn->def(d).getFile() != FILE_GPR)
3630         continue;
3631      if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3632         continue;
3633      defs.set(def->reg.data.id);
3634   }
3635
3636   for (int s = 0; insn->srcExists(s); s++) {
3637      const Value *src = insn->src(s).rep();
3638      if (insn->src(s).getFile() != FILE_GPR)
3639         continue;
3640      if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3641         continue;
3642      if (defs.test(src->reg.data.id))
3643         continue;
3644      if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3645         continue;
3646      if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3647         continue;
3648      assert(s < 4);
3649      emitReuse(insn, s);
3650   }
3651}
3652
3653void
3654SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3655{
3656   int a = v->reg.data.id, b;
3657
3658   switch (v->reg.file) {
3659   case FILE_GPR:
3660      b = a + v->reg.size / 4;
3661      for (int r = a; r < b; ++r)
3662         score->rd.r[r] = ready;
3663      break;
3664   case FILE_PREDICATE:
3665      // To immediately use a predicate set by any instructions, the minimum
3666      // number of stall counts is 13.
3667      score->rd.p[a] = cycle + 13;
3668      break;
3669   case FILE_FLAGS:
3670      score->rd.c = ready;
3671      break;
3672   default:
3673      break;
3674   }
3675}
3676
3677void
3678SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3679{
3680   int a = v->reg.data.id, b;
3681   int ready = cycle;
3682
3683   switch (v->reg.file) {
3684   case FILE_GPR:
3685      b = a + v->reg.size / 4;
3686      for (int r = a; r < b; ++r)
3687         ready = MAX2(ready, score->rd.r[r]);
3688      break;
3689   case FILE_PREDICATE:
3690      ready = MAX2(ready, score->rd.p[a]);
3691      break;
3692   case FILE_FLAGS:
3693      ready = MAX2(ready, score->rd.c);
3694      break;
3695   default:
3696      break;
3697   }
3698   if (cycle < ready)
3699      delay = MAX2(delay, ready - cycle);
3700}
3701
3702void
3703SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3704{
3705   const int ready = cycle + targ->getLatency(insn);
3706
3707   for (int d = 0; insn->defExists(d); ++d)
3708      recordWr(insn->getDef(d), cycle, ready);
3709
3710#ifdef GM107_DEBUG_SCHED_DATA
3711   score->print(cycle);
3712#endif
3713}
3714
3715#define GM107_MIN_ISSUE_DELAY 0x1
3716#define GM107_MAX_ISSUE_DELAY 0xf
3717
3718int
3719SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3720{
3721   int delay = 0, ready = cycle;
3722
3723   for (int s = 0; insn->srcExists(s); ++s)
3724      checkRd(insn->getSrc(s), cycle, delay);
3725
3726   // TODO: make use of getReadLatency()!
3727
3728   return MAX2(delay, ready - cycle);
3729}
3730
3731void
3732SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3733                                   const Instruction *next)
3734{
3735   const OpClass cl = targ->getOpClass(insn->op);
3736   int wr, rd;
3737
3738   if (insn->op == OP_EXIT ||
3739       insn->op == OP_BAR ||
3740       insn->op == OP_MEMBAR) {
3741      delay = GM107_MAX_ISSUE_DELAY;
3742   } else
3743   if (insn->op == OP_QUADON ||
3744       insn->op == OP_QUADPOP) {
3745      delay = 0xd;
3746   } else
3747   if (cl == OPCLASS_FLOW || insn->join) {
3748      delay = 0xd;
3749   }
3750
3751   if (!next || !targ->canDualIssue(insn, next)) {
3752      delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3753   } else {
3754      delay = 0x0; // dual-issue
3755   }
3756
3757   wr = getWrDepBar(insn);
3758   rd = getRdDepBar(insn);
3759
3760   if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3761      // Barriers take one additional clock cycle to become active on top of
3762      // the clock consumed by the instruction producing it.
3763      if (!next || insn->bb != next->bb) {
3764         delay = 0x2;
3765      } else {
3766         int wt = getWtDepBar(next);
3767         if ((wt & (1 << wr)) | (wt & (1 << rd)))
3768            delay = 0x2;
3769      }
3770   }
3771
3772   emitStall(insn, delay);
3773}
3774
3775
3776// Return true when the given instruction needs to emit a read dependency
3777// barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3778// setting the maximum number of stall counts is not enough.
3779bool
3780SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3781{
3782   BitSet srcs(255, 1), defs(255, 1);
3783   int a, b;
3784
3785   if (!targ->isBarrierRequired(insn))
3786      return false;
3787
3788   // Do not emit a read dependency barrier when the instruction doesn't use
3789   // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3790   for (int s = 0; insn->srcExists(s); ++s) {
3791      const Value *src = insn->src(s).rep();
3792      if (insn->src(s).getFile() != FILE_GPR)
3793         continue;
3794      if (src->reg.data.id == 255)
3795         continue;
3796
3797      a = src->reg.data.id;
3798      b = a + src->reg.size / 4;
3799      for (int r = a; r < b; ++r)
3800         srcs.set(r);
3801   }
3802
3803   if (!srcs.popCount())
3804      return false;
3805
3806   // Do not emit a read dependency barrier when the output GPRs are equal to
3807   // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3808   // be produced and WaR hazards are prevented.
3809   for (int d = 0; insn->defExists(d); ++d) {
3810      const Value *def = insn->def(d).rep();
3811      if (insn->def(d).getFile() != FILE_GPR)
3812         continue;
3813      if (def->reg.data.id == 255)
3814         continue;
3815
3816      a = def->reg.data.id;
3817      b = a + def->reg.size / 4;
3818      for (int r = a; r < b; ++r)
3819         defs.set(r);
3820   }
3821
3822   srcs.andNot(defs);
3823   if (!srcs.popCount())
3824      return false;
3825
3826   return true;
3827}
3828
3829// Return true when the given instruction needs to emit a write dependency
3830// barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3831// setting the maximum number of stall counts is not enough. This is only legal
3832// if the instruction output something.
3833bool
3834SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3835{
3836   if (!targ->isBarrierRequired(insn))
3837      return false;
3838
3839   for (int d = 0; insn->defExists(d); ++d) {
3840      if (insn->def(d).getFile() == FILE_GPR ||
3841          insn->def(d).getFile() == FILE_PREDICATE)
3842         return true;
3843   }
3844   return false;
3845}
3846
3847// Find the next instruction inside the same basic block which uses the output
3848// of the given instruction in order to avoid RaW hazards.
3849Instruction *
3850SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3851{
3852   Instruction *insn, *next;
3853   int minGPR, maxGPR;
3854
3855   if (!bari->defExists(0))
3856      return NULL;
3857
3858   minGPR = bari->def(0).rep()->reg.data.id;
3859   maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3860
3861   for (insn = bari->next; insn != NULL; insn = next) {
3862      next = insn->next;
3863
3864      for (int s = 0; insn->srcExists(s); ++s) {
3865         const Value *src = insn->src(s).rep();
3866         if (bari->def(0).getFile() == FILE_GPR) {
3867            if (insn->src(s).getFile() != FILE_GPR ||
3868                src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3869                src->reg.data.id > maxGPR)
3870               continue;
3871            return insn;
3872         } else
3873         if (bari->def(0).getFile() == FILE_PREDICATE) {
3874            if (insn->src(s).getFile() != FILE_PREDICATE ||
3875                src->reg.data.id != minGPR)
3876               continue;
3877            return insn;
3878         }
3879      }
3880   }
3881   return NULL;
3882}
3883
3884// Find the next instruction inside the same basic block which overwrites, at
3885// least, one source of the given instruction in order to avoid WaR hazards.
3886Instruction *
3887SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3888{
3889   Instruction *insn, *next;
3890   int minGPR, maxGPR;
3891
3892   for (insn = bari->next; insn != NULL; insn = next) {
3893      next = insn->next;
3894
3895      for (int d = 0; insn->defExists(d); ++d) {
3896         const Value *def = insn->def(d).rep();
3897         if (insn->def(d).getFile() != FILE_GPR)
3898            continue;
3899
3900         minGPR = def->reg.data.id;
3901         maxGPR = minGPR + def->reg.size / 4 - 1;
3902
3903         for (int s = 0; bari->srcExists(s); ++s) {
3904            const Value *src = bari->src(s).rep();
3905            if (bari->src(s).getFile() != FILE_GPR ||
3906                src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3907                src->reg.data.id > maxGPR)
3908               continue;
3909            return insn;
3910         }
3911      }
3912   }
3913   return NULL;
3914}
3915
3916// Dependency barriers:
3917// This pass is a bit ugly and could probably be improved by performing a
3918// better allocation.
3919//
3920// The main idea is to avoid WaR and RaW hazards by emitting read/write
3921// dependency barriers using the control codes.
3922bool
3923SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
3924{
3925   std::list<LiveBarUse> live_uses;
3926   std::list<LiveBarDef> live_defs;
3927   Instruction *insn, *next;
3928   BitSet bars(6, 1);
3929   int bar_id;
3930
3931   for (insn = bb->getEntry(); insn != NULL; insn = next) {
3932      Instruction *usei = NULL, *defi = NULL;
3933      bool need_wr_bar, need_rd_bar;
3934
3935      next = insn->next;
3936
3937      // Expire old barrier uses.
3938      for (std::list<LiveBarUse>::iterator it = live_uses.begin();
3939           it != live_uses.end();) {
3940         if (insn->serial >= it->usei->serial) {
3941            int wr = getWrDepBar(it->insn);
3942            emitWtDepBar(insn, wr);
3943            bars.clr(wr); // free barrier
3944            it = live_uses.erase(it);
3945            continue;
3946         }
3947         ++it;
3948      }
3949
3950      // Expire old barrier defs.
3951      for (std::list<LiveBarDef>::iterator it = live_defs.begin();
3952           it != live_defs.end();) {
3953         if (insn->serial >= it->defi->serial) {
3954            int rd = getRdDepBar(it->insn);
3955            emitWtDepBar(insn, rd);
3956            bars.clr(rd); // free barrier
3957            it = live_defs.erase(it);
3958            continue;
3959         }
3960         ++it;
3961      }
3962
3963      need_wr_bar = needWrDepBar(insn);
3964      need_rd_bar = needRdDepBar(insn);
3965
3966      if (need_wr_bar) {
3967         // When the instruction requires to emit a write dependency barrier
3968         // (all which write something at a variable latency), find the next
3969         // instruction which reads the outputs.
3970         usei = findFirstUse(insn);
3971
3972         // Allocate and emit a new barrier.
3973         bar_id = bars.findFreeRange(1);
3974         if (bar_id == -1)
3975            bar_id = 5;
3976         bars.set(bar_id);
3977         emitWrDepBar(insn, bar_id);
3978         if (usei)
3979            live_uses.push_back(LiveBarUse(insn, usei));
3980      }
3981
3982      if (need_rd_bar) {
3983         // When the instruction requires to emit a read dependency barrier
3984         // (all which read something at a variable latency), find the next
3985         // instruction which will write the inputs.
3986         defi = findFirstDef(insn);
3987
3988         if (usei && defi && usei->serial <= defi->serial)
3989            continue;
3990
3991         // Allocate and emit a new barrier.
3992         bar_id = bars.findFreeRange(1);
3993         if (bar_id == -1)
3994            bar_id = 5;
3995         bars.set(bar_id);
3996         emitRdDepBar(insn, bar_id);
3997         if (defi)
3998            live_defs.push_back(LiveBarDef(insn, defi));
3999      }
4000   }
4001
4002   // Remove unnecessary barrier waits.
4003   BitSet alive_bars(6, 1);
4004   for (insn = bb->getEntry(); insn != NULL; insn = next) {
4005      int wr, rd, wt;
4006
4007      next = insn->next;
4008
4009      wr = getWrDepBar(insn);
4010      rd = getRdDepBar(insn);
4011      wt = getWtDepBar(insn);
4012
4013      for (int idx = 0; idx < 6; ++idx) {
4014         if (!(wt & (1 << idx)))
4015            continue;
4016         if (!alive_bars.test(idx)) {
4017            insn->sched &= ~(1 << (11  + idx));
4018         } else {
4019            alive_bars.clr(idx);
4020         }
4021      }
4022
4023      if (wr < 6)
4024         alive_bars.set(wr);
4025      if (rd < 6)
4026         alive_bars.set(rd);
4027   }
4028
4029   return true;
4030}
4031
4032bool
4033SchedDataCalculatorGM107::visit(Function *func)
4034{
4035   ArrayList insns;
4036
4037   func->orderInstructions(insns);
4038
4039   scoreBoards.resize(func->cfg.getSize());
4040   for (size_t i = 0; i < scoreBoards.size(); ++i)
4041      scoreBoards[i].wipe();
4042   return true;
4043}
4044
4045bool
4046SchedDataCalculatorGM107::visit(BasicBlock *bb)
4047{
4048   Instruction *insn, *next = NULL;
4049   int cycle = 0;
4050
4051   for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4052      /*XXX*/
4053      insn->sched = 0x7e0;
4054   }
4055
4056   if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4057      return true;
4058
4059   // Insert read/write dependency barriers for instructions which don't
4060   // operate at a fixed latency.
4061   insertBarriers(bb);
4062
4063   score = &scoreBoards.at(bb->getId());
4064
4065   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4066      // back branches will wait until all target dependencies are satisfied
4067      if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4068         continue;
4069      BasicBlock *in = BasicBlock::get(ei.getNode());
4070      score->setMax(&scoreBoards.at(in->getId()));
4071   }
4072
4073#ifdef GM107_DEBUG_SCHED_DATA
4074   INFO("=== BB:%i initial scores\n", bb->getId());
4075   score->print(cycle);
4076#endif
4077
4078   // Because barriers are allocated locally (intra-BB), we have to make sure
4079   // that all produced barriers have been consumed before entering inside a
4080   // new basic block. The best way is to do a global allocation pre RA but
4081   // it's really more difficult, especially because of the phi nodes. Anyways,
4082   // it seems like that waiting on a barrier which has already been consumed
4083   // doesn't add any additional cost, it's just not elegant!
4084   Instruction *start = bb->getEntry();
4085   if (start && bb->cfg.incidentCount() > 0) {
4086      for (int b = 0; b < 6; b++)
4087         emitWtDepBar(start, b);
4088   }
4089
4090   for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4091      next = insn->next;
4092
4093      commitInsn(insn, cycle);
4094      int delay = calcDelay(next, cycle);
4095      setDelay(insn, delay, next);
4096      cycle += getStall(insn);
4097
4098      setReuseFlag(insn);
4099
4100      // XXX: The yield flag seems to destroy a bunch of things when it is
4101      // set on every instruction, need investigation.
4102      //emitYield(insn);
4103
4104#ifdef GM107_DEBUG_SCHED_DATA
4105      printSchedInfo(cycle, insn);
4106      insn->print();
4107      next->print();
4108#endif
4109   }
4110
4111   if (!insn)
4112      return true;
4113   commitInsn(insn, cycle);
4114
4115   int bbDelay = -1;
4116
4117#ifdef GM107_DEBUG_SCHED_DATA
4118   fprintf(stderr, "last instruction is : ");
4119   insn->print();
4120   fprintf(stderr, "cycle=%d\n", cycle);
4121#endif
4122
4123   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4124      BasicBlock *out = BasicBlock::get(ei.getNode());
4125
4126      if (ei.getType() != Graph::Edge::BACK) {
4127         // Only test the first instruction of the outgoing block.
4128         next = out->getEntry();
4129         if (next) {
4130            bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4131         } else {
4132            // When the outgoing BB is empty, make sure to set the number of
4133            // stall counts needed by the instruction because we don't know the
4134            // next instruction.
4135            bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4136         }
4137      } else {
4138         // Wait until all dependencies are satisfied.
4139         const int regsFree = score->getLatest();
4140         next = out->getFirst();
4141         for (int c = cycle; next && c < regsFree; next = next->next) {
4142            bbDelay = MAX2(bbDelay, calcDelay(next, c));
4143            c += getStall(next);
4144         }
4145         next = NULL;
4146      }
4147   }
4148   if (bb->cfg.outgoingCount() != 1)
4149      next = NULL;
4150   setDelay(insn, bbDelay, next);
4151   cycle += getStall(insn);
4152
4153   score->rebase(cycle); // common base for initializing out blocks' scores
4154   return true;
4155}
4156
4157/*******************************************************************************
4158 * main
4159 ******************************************************************************/
4160
4161void
4162CodeEmitterGM107::prepareEmission(Function *func)
4163{
4164   SchedDataCalculatorGM107 sched(targGM107);
4165   CodeEmitter::prepareEmission(func);
4166   sched.run(func, true, true);
4167}
4168
4169static inline uint32_t sizeToBundlesGM107(uint32_t size)
4170{
4171   return (size + 23) / 24;
4172}
4173
4174void
4175CodeEmitterGM107::prepareEmission(Program *prog)
4176{
4177   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4178        !fi.end(); fi.next()) {
4179      Function *func = reinterpret_cast<Function *>(fi.get());
4180      func->binPos = prog->binSize;
4181      prepareEmission(func);
4182
4183      // adjust sizes & positions for schedulding info:
4184      if (prog->getTarget()->hasSWSched) {
4185         uint32_t adjPos = func->binPos;
4186         BasicBlock *bb = NULL;
4187         for (int i = 0; i < func->bbCount; ++i) {
4188            bb = func->bbArray[i];
4189            int32_t adjSize = bb->binSize;
4190            if (adjPos % 32) {
4191               adjSize -= 32 - adjPos % 32;
4192               if (adjSize < 0)
4193                  adjSize = 0;
4194            }
4195            adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4196            bb->binPos = adjPos;
4197            bb->binSize = adjSize;
4198            adjPos += adjSize;
4199         }
4200         if (bb)
4201            func->binSize = adjPos - func->binPos;
4202      }
4203
4204      prog->binSize += func->binSize;
4205   }
4206}
4207
4208CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4209   : CodeEmitter(target),
4210     targGM107(target),
4211     writeIssueDelays(target->hasSWSched)
4212{
4213   code = NULL;
4214   codeSize = codeSizeLimit = 0;
4215   relocInfo = NULL;
4216}
4217
4218CodeEmitter *
4219TargetGM107::createCodeEmitterGM107(Program::Type type)
4220{
4221   CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4222   emit->setProgramType(type);
4223   return emit;
4224}
4225
4226} // namespace nv50_ir
4227