nv50_ir_emit_nv50.cpp revision f19b7eceb0a4465a729031a5493154e9ab45203a
1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv50_ir.h"
24#include "nv50_ir_target_nv50.h"
25
26namespace nv50_ir {
27
28#define NV50_OP_ENC_LONG     0
29#define NV50_OP_ENC_SHORT    1
30#define NV50_OP_ENC_IMM      2
31#define NV50_OP_ENC_LONG_ALT 3
32
33class CodeEmitterNV50 : public CodeEmitter
34{
35public:
36   CodeEmitterNV50(const TargetNV50 *);
37
38   virtual bool emitInstruction(Instruction *);
39
40   virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42   inline void setProgramType(Program::Type pType) { progType = pType; }
43
44   virtual void prepareEmission(Function *);
45
46private:
47   Program::Type progType;
48
49   const TargetNV50 *targ;
50
51private:
52   inline void defId(const ValueDef&, const int pos);
53   inline void srcId(const ValueRef&, const int pos);
54   inline void srcId(const ValueRef *, const int pos);
55
56   inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57   inline void srcAddr8(const ValueRef&, const int pos);
58
59   void emitFlagsRd(const Instruction *);
60   void emitFlagsWr(const Instruction *);
61
62   void emitCondCode(CondCode cc, DataType ty, int pos);
63
64   inline void setARegBits(unsigned int);
65
66   void setAReg16(const Instruction *, int s);
67   void setImmediate(const Instruction *, int s);
68
69   void setDst(const Value *);
70   void setDst(const Instruction *, int d);
71   void setSrcFileBits(const Instruction *, int enc);
72   void setSrc(const Instruction *, unsigned int s, int slot);
73
74   void emitForm_MAD(const Instruction *);
75   void emitForm_ADD(const Instruction *);
76   void emitForm_MUL(const Instruction *);
77   void emitForm_IMM(const Instruction *);
78
79   void emitLoadStoreSizeLG(DataType ty, int pos);
80   void emitLoadStoreSizeCS(DataType ty);
81
82   void roundMode_MAD(const Instruction *);
83   void roundMode_CVT(RoundMode);
84
85   void emitMNeg12(const Instruction *);
86
87   void emitLOAD(const Instruction *);
88   void emitSTORE(const Instruction *);
89   void emitMOV(const Instruction *);
90   void emitNOP();
91   void emitINTERP(const Instruction *);
92   void emitPFETCH(const Instruction *);
93   void emitOUT(const Instruction *);
94
95   void emitUADD(const Instruction *);
96   void emitAADD(const Instruction *);
97   void emitFADD(const Instruction *);
98   void emitIMUL(const Instruction *);
99   void emitFMUL(const Instruction *);
100   void emitFMAD(const Instruction *);
101   void emitIMAD(const Instruction *);
102   void emitISAD(const Instruction *);
103
104   void emitMINMAX(const Instruction *);
105
106   void emitPreOp(const Instruction *);
107   void emitSFnOp(const Instruction *, uint8_t subOp);
108
109   void emitShift(const Instruction *);
110   void emitARL(const Instruction *, unsigned int shl);
111   void emitLogicOp(const Instruction *);
112   void emitNOT(const Instruction *);
113
114   void emitCVT(const Instruction *);
115   void emitSET(const Instruction *);
116
117   void emitTEX(const TexInstruction *);
118   void emitTXQ(const TexInstruction *);
119
120   void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
121
122   void emitFlow(const Instruction *, uint8_t flowOp);
123   void emitPRERETEmu(const FlowInstruction *);
124};
125
126#define SDATA(a) ((a).rep()->reg.data)
127#define DDATA(a) ((a).rep()->reg.data)
128
129void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
130{
131   assert(src.get());
132   code[pos / 32] |= SDATA(src).id << (pos % 32);
133}
134
135void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
136{
137   assert(src->get());
138   code[pos / 32] |= SDATA(*src).id << (pos % 32);
139}
140
141void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
142{
143   assert(src.get());
144
145   int32_t offset = SDATA(src).offset;
146
147   assert(!adj || src.get()->reg.size <= 4);
148   if (adj)
149      offset /= src.get()->reg.size;
150
151   assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
152
153   if (offset < 0)
154      offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
155
156   code[pos / 32] |= offset << (pos % 32);
157}
158
159void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
160{
161   assert(src.get());
162
163   uint32_t offset = SDATA(src).offset;
164
165   assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
166
167   code[pos / 32] |= (offset >> 2) << (pos % 32);
168}
169
170void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
171{
172   assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
173
174   code[pos / 32] |= DDATA(def).id << (pos % 32);
175}
176
177void
178CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
179{
180   switch (insn->rnd) {
181   case ROUND_M: code[1] |= 1 << 22; break;
182   case ROUND_P: code[1] |= 2 << 22; break;
183   case ROUND_Z: code[1] |= 3 << 22; break;
184   default:
185      assert(insn->rnd == ROUND_N);
186      break;
187   }
188}
189
190void
191CodeEmitterNV50::emitMNeg12(const Instruction *i)
192{
193   code[1] |= i->src(0).mod.neg() << 26;
194   code[1] |= i->src(1).mod.neg() << 27;
195}
196
197void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
198{
199   uint8_t enc;
200
201   assert(pos >= 32 || pos <= 27);
202
203   switch (cc) {
204   case CC_LT:  enc = 0x1; break;
205   case CC_LTU: enc = 0x9; break;
206   case CC_EQ:  enc = 0x2; break;
207   case CC_EQU: enc = 0xa; break;
208   case CC_LE:  enc = 0x3; break;
209   case CC_LEU: enc = 0xb; break;
210   case CC_GT:  enc = 0x4; break;
211   case CC_GTU: enc = 0xc; break;
212   case CC_NE:  enc = 0x5; break;
213   case CC_NEU: enc = 0xd; break;
214   case CC_GE:  enc = 0x6; break;
215   case CC_GEU: enc = 0xe; break;
216   case CC_TR:  enc = 0xf; break;
217   case CC_FL:  enc = 0x0; break;
218
219   case CC_O:  enc = 0x10; break;
220   case CC_C:  enc = 0x11; break;
221   case CC_A:  enc = 0x12; break;
222   case CC_S:  enc = 0x13; break;
223   case CC_NS: enc = 0x1c; break;
224   case CC_NA: enc = 0x1d; break;
225   case CC_NC: enc = 0x1e; break;
226   case CC_NO: enc = 0x1f; break;
227
228   default:
229      enc = 0;
230      assert(!"invalid condition code");
231      break;
232   }
233   if (ty != TYPE_NONE && !isFloatType(ty))
234      enc &= ~0x8; // unordered only exists for float types
235
236   code[pos / 32] |= enc << (pos % 32);
237}
238
239void
240CodeEmitterNV50::emitFlagsRd(const Instruction *i)
241{
242   int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
243
244   assert(!(code[1] & 0x00003f80));
245
246   if (s >= 0) {
247      assert(i->getSrc(s)->reg.file == FILE_FLAGS);
248      emitCondCode(i->cc, TYPE_NONE, 32 + 7);
249      srcId(i->src(s), 32 + 12);
250   } else {
251      code[1] |= 0x0780;
252   }
253}
254
255void
256CodeEmitterNV50::emitFlagsWr(const Instruction *i)
257{
258   assert(!(code[1] & 0x70));
259
260   int flagsDef = i->flagsDef;
261
262   // find flags definition and check that it is the last def
263   if (flagsDef < 0) {
264      for (int d = 0; i->defExists(d); ++d)
265         if (i->def(d).getFile() == FILE_FLAGS)
266            flagsDef = d;
267      if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
268         WARN("Instruction::flagsDef was not set properly\n");
269   }
270   if (flagsDef == 0 && i->defExists(1))
271      WARN("flags def should not be the primary definition\n");
272
273   if (flagsDef >= 0)
274      code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
275
276}
277
278void
279CodeEmitterNV50::setARegBits(unsigned int u)
280{
281   code[0] |= (u & 3) << 26;
282   code[1] |= (u & 4);
283}
284
285void
286CodeEmitterNV50::setAReg16(const Instruction *i, int s)
287{
288   if (i->srcExists(s)) {
289      s = i->src(s).indirect[0];
290      if (s >= 0)
291         setARegBits(SDATA(i->src(s)).id + 1);
292   }
293}
294
295void
296CodeEmitterNV50::setImmediate(const Instruction *i, int s)
297{
298   const ImmediateValue *imm = i->src(s).get()->asImm();
299   assert(imm);
300
301   uint32_t u = imm->reg.data.u32;
302
303   if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
304      u = ~u;
305
306   code[1] |= 3;
307   code[0] |= (u & 0x3f) << 16;
308   code[1] |= (u >> 6) << 2;
309}
310
311void
312CodeEmitterNV50::setDst(const Value *dst)
313{
314   const Storage *reg = &dst->join->reg;
315
316   assert(reg->file != FILE_ADDRESS);
317
318   if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
319      code[0] |= (127 << 2) | 1;
320      code[1] |= 8;
321   } else {
322      int id;
323      if (reg->file == FILE_SHADER_OUTPUT) {
324         code[1] |= 8;
325         id = reg->data.offset / 4;
326      } else {
327         id = reg->data.id;
328      }
329      code[0] |= id << 2;
330   }
331}
332
333void
334CodeEmitterNV50::setDst(const Instruction *i, int d)
335{
336   if (i->defExists(d)) {
337      setDst(i->getDef(d));
338   } else
339   if (!d) {
340      code[0] |= 0x01fc; // bit bucket
341      code[1] |= 0x0008;
342   }
343}
344
345// 3 * 2 bits:
346// 0: r
347// 1: a/s
348// 2: c
349// 3: i
350void
351CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
352{
353   uint8_t mode = 0;
354
355   for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
356      switch (i->src(s).getFile()) {
357      case FILE_GPR:
358         break;
359      case FILE_MEMORY_SHARED:
360      case FILE_SHADER_INPUT:
361         mode |= 1 << (s * 2);
362         break;
363      case FILE_MEMORY_CONST:
364         mode |= 2 << (s * 2);
365         break;
366      case FILE_IMMEDIATE:
367         mode |= 3 << (s * 2);
368         break;
369      default:
370	      ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
371         assert(0);
372         break;
373      }
374   }
375   switch (mode) {
376   case 0x00: // rrr
377      break;
378   case 0x01: // arr/grr
379      if (progType == Program::TYPE_GEOMETRY) {
380         code[0] |= 0x01800000;
381         if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
382            code[1] |= 0x00200000;
383      } else {
384         if (enc == NV50_OP_ENC_SHORT)
385            code[0] |= 0x01000000;
386         else
387            code[1] |= 0x00200000;
388      }
389      break;
390   case 0x03: // irr
391      assert(i->op == OP_MOV);
392      return;
393   case 0x0c: // rir
394      break;
395   case 0x0d: // gir
396      code[0] |= 0x01000000;
397      assert(progType == Program::TYPE_GEOMETRY ||
398             progType == Program::TYPE_COMPUTE);
399      break;
400   case 0x08: // rcr
401      code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
402      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
403      break;
404   case 0x09: // acr/gcr
405      if (progType == Program::TYPE_GEOMETRY) {
406         code[0] |= 0x01800000;
407      } else {
408         code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
409         code[1] |= 0x00200000;
410      }
411      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
412      break;
413   case 0x20: // rrc
414      code[0] |= 0x01000000;
415      code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
416      break;
417   case 0x21: // arc
418      code[0] |= 0x01000000;
419      code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
420      assert(progType != Program::TYPE_GEOMETRY);
421      break;
422   default:
423      ERROR("not encodable: %x\n", mode);
424      assert(0);
425      break;
426   }
427   if (progType != Program::TYPE_COMPUTE)
428      return;
429
430   if ((mode & 3) == 1) {
431      const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
432
433      switch (i->getSrc(0)->reg.type) {
434      case TYPE_U8:
435         break;
436      case TYPE_U16:
437         code[0] |= 1 << pos;
438         break;
439      case TYPE_S16:
440         code[0] |= 2 << pos;
441         break;
442      default:
443         code[0] |= 3 << pos;
444         assert(i->getSrc(0)->reg.size == 4);
445         break;
446      }
447   }
448}
449
450void
451CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
452{
453   if (Target::operationSrcNr[i->op] <= s)
454      return;
455   const Storage *reg = &i->src(s).rep()->reg;
456
457   unsigned int id = (reg->file == FILE_GPR) ?
458      reg->data.id :
459      reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
460
461   switch (slot) {
462   case 0: code[0] |= id << 9; break;
463   case 1: code[0] |= id << 16; break;
464   case 2: code[1] |= id << 14; break;
465   default:
466      assert(0);
467      break;
468   }
469}
470
471// the default form:
472//  - long instruction
473//  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
474//  - address & flags
475void
476CodeEmitterNV50::emitForm_MAD(const Instruction *i)
477{
478   assert(i->encSize == 8);
479   code[0] |= 1;
480
481   emitFlagsRd(i);
482   emitFlagsWr(i);
483
484   setDst(i, 0);
485
486   setSrcFileBits(i, NV50_OP_ENC_LONG);
487   setSrc(i, 0, 0);
488   setSrc(i, 1, 1);
489   setSrc(i, 2, 2);
490
491   setAReg16(i, 1);
492}
493
494// like default form, but 2nd source in slot 2, and no 3rd source
495void
496CodeEmitterNV50::emitForm_ADD(const Instruction *i)
497{
498   assert(i->encSize == 8);
499   code[0] |= 1;
500
501   emitFlagsRd(i);
502   emitFlagsWr(i);
503
504   setDst(i, 0);
505
506   setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
507   setSrc(i, 0, 0);
508   setSrc(i, 1, 2);
509
510   setAReg16(i, 1);
511}
512
513// default short form (rr, ar, rc, gr)
514void
515CodeEmitterNV50::emitForm_MUL(const Instruction *i)
516{
517   assert(i->encSize == 4 && !(code[0] & 1));
518   assert(i->defExists(0));
519   assert(!i->getPredicate());
520
521   setDst(i, 0);
522
523   setSrcFileBits(i, NV50_OP_ENC_SHORT);
524   setSrc(i, 0, 0);
525   setSrc(i, 1, 1);
526}
527
528// usual immediate form
529// - 1 to 3 sources where last is immediate (rir, gir)
530// - no address or predicate possible
531void
532CodeEmitterNV50::emitForm_IMM(const Instruction *i)
533{
534   assert(i->encSize == 8);
535   code[0] |= 1;
536
537   assert(i->defExists(0) && i->srcExists(0));
538
539   setDst(i, 0);
540
541   setSrcFileBits(i, NV50_OP_ENC_IMM);
542   if (Target::operationSrcNr[i->op] > 1) {
543      setSrc(i, 0, 0);
544      setImmediate(i, 1);
545      setSrc(i, 2, 1);
546   } else {
547      setImmediate(i, 0);
548   }
549}
550
551void
552CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
553{
554   uint8_t enc;
555
556   switch (ty) {
557   case TYPE_F32: // fall through
558   case TYPE_S32: // fall through
559   case TYPE_U32:  enc = 0x6; break;
560   case TYPE_B128: enc = 0x5; break;
561   case TYPE_F64: // fall through
562   case TYPE_S64: // fall through
563   case TYPE_U64:  enc = 0x4; break;
564   case TYPE_S16:  enc = 0x3; break;
565   case TYPE_U16:  enc = 0x2; break;
566   case TYPE_S8:   enc = 0x1; break;
567   case TYPE_U8:   enc = 0x0; break;
568   default:
569      enc = 0;
570      assert(!"invalid load/store type");
571      break;
572   }
573   code[pos / 32] |= enc << (pos % 32);
574}
575
576void
577CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
578{
579   switch (ty) {
580   case TYPE_U8: break;
581   case TYPE_U16: code[1] |= 0x4000; break;
582   case TYPE_S16: code[1] |= 0x8000; break;
583   case TYPE_F32:
584   case TYPE_S32:
585   case TYPE_U32: code[1] |= 0xc000; break;
586   default:
587      assert(0);
588      break;
589   }
590}
591
592void
593CodeEmitterNV50::emitLOAD(const Instruction *i)
594{
595   DataFile sf = i->src(0).getFile();
596   int32_t offset = i->getSrc(0)->reg.data.offset;
597
598   switch (sf) {
599   case FILE_SHADER_INPUT:
600      // use 'mov' where we can
601      code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
602      code[1] = 0x00200000 | (i->lanes << 14);
603      if (typeSizeof(i->dType) == 4)
604         code[1] |= 0x04000000;
605      break;
606   case FILE_MEMORY_SHARED:
607      if (targ->getChipset() >= 0x84) {
608         assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
609         code[0] = 0x10000001;
610         code[1] = 0x40000000;
611
612         if (typeSizeof(i->dType) == 4)
613            code[1] |= 0x04000000;
614
615         emitLoadStoreSizeCS(i->sType);
616      } else {
617         assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
618         code[0] = 0x10000001;
619         code[1] = 0x00200000 | (i->lanes << 14);
620         emitLoadStoreSizeCS(i->sType);
621      }
622      break;
623   case FILE_MEMORY_CONST:
624      code[0] = 0x10000001;
625      code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
626      if (typeSizeof(i->dType) == 4)
627         code[1] |= 0x04000000;
628      emitLoadStoreSizeCS(i->sType);
629      break;
630   case FILE_MEMORY_LOCAL:
631      code[0] = 0xd0000001;
632      code[1] = 0x40000000;
633      break;
634   case FILE_MEMORY_GLOBAL:
635      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
636      code[1] = 0x80000000;
637      break;
638   default:
639      assert(!"invalid load source file");
640      break;
641   }
642   if (sf == FILE_MEMORY_LOCAL ||
643       sf == FILE_MEMORY_GLOBAL)
644      emitLoadStoreSizeLG(i->sType, 21 + 32);
645
646   setDst(i, 0);
647
648   emitFlagsRd(i);
649   emitFlagsWr(i);
650
651   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
652      srcId(*i->src(0).getIndirect(0), 9);
653   } else {
654      setAReg16(i, 0);
655      srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
656   }
657}
658
659void
660CodeEmitterNV50::emitSTORE(const Instruction *i)
661{
662   DataFile f = i->getSrc(0)->reg.file;
663   int32_t offset = i->getSrc(0)->reg.data.offset;
664
665   switch (f) {
666   case FILE_SHADER_OUTPUT:
667      code[0] = 0x00000001 | ((offset >> 2) << 9);
668      code[1] = 0x80c00000;
669      srcId(i->src(1), 32 + 14);
670      break;
671   case FILE_MEMORY_GLOBAL:
672      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
673      code[1] = 0xa0000000;
674      emitLoadStoreSizeLG(i->dType, 21 + 32);
675      srcId(i->src(1), 2);
676      break;
677   case FILE_MEMORY_LOCAL:
678      code[0] = 0xd0000001;
679      code[1] = 0x60000000;
680      emitLoadStoreSizeLG(i->dType, 21 + 32);
681      srcId(i->src(1), 2);
682      break;
683   case FILE_MEMORY_SHARED:
684      code[0] = 0x00000001;
685      code[1] = 0xe0000000;
686      switch (typeSizeof(i->dType)) {
687      case 1:
688         code[0] |= offset << 9;
689         code[1] |= 0x00400000;
690         break;
691      case 2:
692         code[0] |= (offset >> 1) << 9;
693         break;
694      case 4:
695         code[0] |= (offset >> 2) << 9;
696         code[1] |= 0x04200000;
697         break;
698      default:
699         assert(0);
700         break;
701      }
702      srcId(i->src(1), 32 + 14);
703      break;
704   default:
705      assert(!"invalid store destination file");
706      break;
707   }
708
709   if (f == FILE_MEMORY_GLOBAL)
710      srcId(*i->src(0).getIndirect(0), 9);
711   else
712      setAReg16(i, 0);
713
714   if (f == FILE_MEMORY_LOCAL)
715      srcAddr16(i->src(0), false, 9);
716
717   emitFlagsRd(i);
718}
719
720void
721CodeEmitterNV50::emitMOV(const Instruction *i)
722{
723   DataFile sf = i->getSrc(0)->reg.file;
724   DataFile df = i->getDef(0)->reg.file;
725
726   assert(sf == FILE_GPR || df == FILE_GPR);
727
728   if (sf == FILE_FLAGS) {
729      code[0] = 0x00000001;
730      code[1] = 0x20000000;
731      defId(i->def(0), 2);
732      srcId(i->src(0), 12);
733      emitFlagsRd(i);
734   } else
735   if (sf == FILE_ADDRESS) {
736      code[0] = 0x00000001;
737      code[1] = 0x40000000;
738      defId(i->def(0), 2);
739      setARegBits(SDATA(i->src(0)).id + 1);
740      emitFlagsRd(i);
741   } else
742   if (df == FILE_FLAGS) {
743      code[0] = 0x00000001;
744      code[1] = 0xa0000000;
745      defId(i->def(0), 4);
746      srcId(i->src(0), 9);
747      emitFlagsRd(i);
748   } else
749   if (sf == FILE_IMMEDIATE) {
750      code[0] = 0x10008001;
751      code[1] = 0x00000003;
752      emitForm_IMM(i);
753   } else {
754      if (i->encSize == 4) {
755         code[0] = 0x10008000;
756      } else {
757         code[0] = 0x10000001;
758         code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
759         code[1] |= (i->lanes << 14);
760         emitFlagsRd(i);
761      }
762      defId(i->def(0), 2);
763      srcId(i->src(0), 9);
764   }
765   if (df == FILE_SHADER_OUTPUT) {
766      assert(i->encSize == 8);
767      code[1] |= 0x8;
768   }
769}
770
771void
772CodeEmitterNV50::emitNOP()
773{
774   code[0] = 0xf0000001;
775   code[1] = 0xe0000000;
776}
777
778void
779CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
780{
781   code[0] = 0xc0000000 | (lane << 16);
782   code[1] = 0x80000000;
783
784   code[0] |= (quOp & 0x03) << 20;
785   code[1] |= (quOp & 0xfc) << 20;
786
787   emitForm_ADD(i);
788
789   if (!i->srcExists(1))
790      srcId(i->src(0), 32 + 14);
791}
792
793void
794CodeEmitterNV50::emitPFETCH(const Instruction *i)
795{
796   code[0] = 0x11800001;
797   code[1] = 0x04200000 | (0xf << 14);
798
799   defId(i->def(0), 2);
800   srcAddr8(i->src(0), 9);
801   setAReg16(i, 0);
802}
803
804void
805CodeEmitterNV50::emitINTERP(const Instruction *i)
806{
807   code[0] = 0x80000000;
808
809   defId(i->def(0), 2);
810   srcAddr8(i->src(0), 16);
811
812   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
813      code[0] |= 1 << 8;
814   } else {
815      if (i->op == OP_PINTERP) {
816         code[0] |= 1 << 25;
817         srcId(i->src(1), 9);
818      }
819      if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
820         code[0] |= 1 << 24;
821   }
822
823   if (i->encSize == 8) {
824      code[1] =
825         (code[0] & (3 << 24)) >> (24 - 16) |
826         (code[0] & (1 <<  8)) << (18 -  8);
827      code[0] &= ~0x03000100;
828      code[0] |= 1;
829      emitFlagsRd(i);
830   }
831}
832
833void
834CodeEmitterNV50::emitMINMAX(const Instruction *i)
835{
836   if (i->dType == TYPE_F64) {
837      code[0] = 0xe0000000;
838      code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
839   } else {
840      code[0] = 0x30000000;
841      code[1] = 0x80000000;
842      if (i->op == OP_MIN)
843         code[1] |= 0x20000000;
844
845      switch (i->dType) {
846      case TYPE_F32: code[0] |= 0x80000000; break;
847      case TYPE_S32: code[1] |= 0x8c000000; break;
848      case TYPE_U32: code[1] |= 0x84000000; break;
849      case TYPE_S16: code[1] |= 0x80000000; break;
850      case TYPE_U16: break;
851      default:
852         assert(0);
853         break;
854      }
855      code[1] |= i->src(0).mod.abs() << 20;
856      code[1] |= i->src(1).mod.abs() << 19;
857   }
858   emitForm_MAD(i);
859}
860
861void
862CodeEmitterNV50::emitFMAD(const Instruction *i)
863{
864   const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
865   const int neg_add = i->src(2).mod.neg();
866
867   code[0] = 0xe0000000;
868
869   if (i->encSize == 4) {
870      emitForm_MUL(i);
871      assert(!neg_mul && !neg_add);
872   } else {
873      code[1]  = neg_mul << 26;
874      code[1] |= neg_add << 27;
875      if (i->saturate)
876         code[1] |= 1 << 29;
877      emitForm_MAD(i);
878   }
879}
880
881void
882CodeEmitterNV50::emitFADD(const Instruction *i)
883{
884   const int neg0 = i->src(0).mod.neg();
885   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
886
887   code[0] = 0xb0000000;
888
889   assert(!(i->src(0).mod | i->src(1).mod).abs());
890
891   if (i->src(1).getFile() == FILE_IMMEDIATE) {
892      code[1] = 0;
893      emitForm_IMM(i);
894      code[0] |= neg0 << 15;
895      code[0] |= neg1 << 22;
896      if (i->saturate)
897         code[0] |= 1 << 8;
898   } else
899   if (i->encSize == 8) {
900      code[1] = 0;
901      emitForm_ADD(i);
902      code[1] |= neg0 << 26;
903      code[1] |= neg1 << 27;
904      if (i->saturate)
905         code[1] |= 1 << 29;
906   } else {
907      emitForm_MUL(i);
908      code[0] |= neg0 << 15;
909      code[0] |= neg1 << 22;
910      if (i->saturate)
911         code[0] |= 1 << 8;
912   }
913}
914
915void
916CodeEmitterNV50::emitUADD(const Instruction *i)
917{
918   const int neg0 = i->src(0).mod.neg();
919   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
920
921   code[0] = 0x20008000;
922
923   if (i->src(1).getFile() == FILE_IMMEDIATE) {
924      code[1] = 0;
925      emitForm_IMM(i);
926   } else
927   if (i->encSize == 8) {
928      code[0] = 0x20000000;
929      code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
930      emitForm_ADD(i);
931   } else {
932      emitForm_MUL(i);
933   }
934   assert(!(neg0 && neg1));
935   code[0] |= neg0 << 28;
936   code[0] |= neg1 << 22;
937
938   if (i->flagsSrc >= 0) {
939      // addc == sub | subr
940      assert(!(code[0] & 0x10400000) && !i->getPredicate());
941      code[0] |= 0x10400000;
942      srcId(i->src(i->flagsSrc), 32 + 12);
943   }
944}
945
946void
947CodeEmitterNV50::emitAADD(const Instruction *i)
948{
949   const int s = (i->op == OP_MOV) ? 0 : 1;
950
951   code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
952   code[1] = 0x20000000;
953
954   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
955
956   emitFlagsRd(i);
957
958   if (s && i->srcExists(0))
959      setARegBits(SDATA(i->src(0)).id + 1);
960}
961
962void
963CodeEmitterNV50::emitIMUL(const Instruction *i)
964{
965   code[0] = 0x40000000;
966
967   if (i->encSize == 8) {
968      code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
969      emitForm_MAD(i);
970   } else {
971      if (i->sType == TYPE_S16)
972         code[0] |= 0x8100;
973      emitForm_MUL(i);
974   }
975}
976
977void
978CodeEmitterNV50::emitFMUL(const Instruction *i)
979{
980   const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
981
982   code[0] = 0xc0000000;
983
984   if (i->src(1).getFile() == FILE_IMMEDIATE) {
985      code[1] = 0;
986      emitForm_IMM(i);
987      if (neg)
988         code[0] |= 0x8000;
989   } else
990   if (i->encSize == 8) {
991      code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
992      if (neg)
993         code[1] |= 0x08000000;
994      emitForm_MAD(i);
995   } else {
996      emitForm_MUL(i);
997      if (neg)
998         code[0] |= 0x8000;
999   }
1000}
1001
1002void
1003CodeEmitterNV50::emitIMAD(const Instruction *i)
1004{
1005   code[0] = 0x60000000;
1006   if (isSignedType(i->sType))
1007      code[1] = i->saturate ? 0x40000000 : 0x20000000;
1008   else
1009      code[1] = 0x00000000;
1010
1011   int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1012   int neg2 = i->src(2).mod.neg();
1013
1014   assert(!(neg1 & neg2));
1015   code[1] |= neg1 << 27;
1016   code[1] |= neg2 << 26;
1017
1018   emitForm_MAD(i);
1019
1020   if (i->flagsSrc >= 0) {
1021      // add with carry from $cX
1022      assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1023      code[1] |= 0xc << 24;
1024      srcId(i->src(i->flagsSrc), 32 + 12);
1025   }
1026}
1027
1028void
1029CodeEmitterNV50::emitISAD(const Instruction *i)
1030{
1031   if (i->encSize == 8) {
1032      code[0] = 0x50000000;
1033      switch (i->sType) {
1034      case TYPE_U32: code[1] = 0x04000000; break;
1035      case TYPE_S32: code[1] = 0x0c000000; break;
1036      case TYPE_U16: code[1] = 0x00000000; break;
1037      case TYPE_S16: code[1] = 0x08000000; break;
1038      default:
1039         assert(0);
1040         break;
1041      }
1042      emitForm_MAD(i);
1043   } else {
1044      switch (i->sType) {
1045      case TYPE_U32: code[0] = 0x50008000; break;
1046      case TYPE_S32: code[0] = 0x50008100; break;
1047      case TYPE_U16: code[0] = 0x50000000; break;
1048      case TYPE_S16: code[0] = 0x50000100; break;
1049      default:
1050         assert(0);
1051         break;
1052      }
1053      emitForm_MUL(i);
1054   }
1055}
1056
1057void
1058CodeEmitterNV50::emitSET(const Instruction *i)
1059{
1060   code[0] = 0x30000000;
1061   code[1] = 0x60000000;
1062
1063   emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1064
1065   switch (i->sType) {
1066   case TYPE_F32: code[0] |= 0x80000000; break;
1067   case TYPE_S32: code[1] |= 0x0c000000; break;
1068   case TYPE_U32: code[1] |= 0x04000000; break;
1069   case TYPE_S16: code[1] |= 0x08000000; break;
1070   case TYPE_U16: break;
1071   default:
1072      assert(0);
1073      break;
1074   }
1075   if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1076   if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1077   if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1078   if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1079
1080   emitForm_MAD(i);
1081}
1082
1083void
1084CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1085{
1086   switch (rnd) {
1087   case ROUND_NI: code[1] |= 0x08000000; break;
1088   case ROUND_M:  code[1] |= 0x00020000; break;
1089   case ROUND_MI: code[1] |= 0x08020000; break;
1090   case ROUND_P:  code[1] |= 0x00040000; break;
1091   case ROUND_PI: code[1] |= 0x08040000; break;
1092   case ROUND_Z:  code[1] |= 0x00060000; break;
1093   case ROUND_ZI: code[1] |= 0x08060000; break;
1094   default:
1095      assert(rnd == ROUND_N);
1096      break;
1097   }
1098}
1099
1100void
1101CodeEmitterNV50::emitCVT(const Instruction *i)
1102{
1103   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1104   RoundMode rnd;
1105
1106   switch (i->op) {
1107   case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1108   case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1109   case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1110   default:
1111      rnd = i->rnd;
1112      break;
1113   }
1114
1115   code[0] = 0xa0000000;
1116
1117   switch (i->dType) {
1118   case TYPE_F64:
1119      switch (i->sType) {
1120      case TYPE_F64: code[1] = 0xc4404000; break;
1121      case TYPE_S64: code[1] = 0x44414000; break;
1122      case TYPE_U64: code[1] = 0x44404000; break;
1123      case TYPE_F32: code[1] = 0xc4400000; break;
1124      case TYPE_S32: code[1] = 0x44410000; break;
1125      case TYPE_U32: code[1] = 0x44400000; break;
1126      default:
1127         assert(0);
1128         break;
1129      }
1130      break;
1131   case TYPE_S64:
1132      switch (i->sType) {
1133      case TYPE_F64: code[1] = 0x8c404000; break;
1134      case TYPE_F32: code[1] = 0x8c400000; break;
1135      default:
1136         assert(0);
1137         break;
1138      }
1139      break;
1140   case TYPE_U64:
1141      switch (i->sType) {
1142      case TYPE_F64: code[1] = 0x84404000; break;
1143      case TYPE_F32: code[1] = 0x84400000; break;
1144      default:
1145         assert(0);
1146         break;
1147      }
1148      break;
1149   case TYPE_F32:
1150      switch (i->sType) {
1151      case TYPE_F64: code[1] = 0xc0404000; break;
1152      case TYPE_S64: code[1] = 0x40414000; break;
1153      case TYPE_U64: code[1] = 0x40404000; break;
1154      case TYPE_F32: code[1] = 0xc4004000; break;
1155      case TYPE_S32: code[1] = 0x44014000; break;
1156      case TYPE_U32: code[1] = 0x44004000; break;
1157      case TYPE_F16: code[1] = 0xc4000000; break;
1158      default:
1159         assert(0);
1160         break;
1161      }
1162      break;
1163   case TYPE_S32:
1164      switch (i->sType) {
1165      case TYPE_F64: code[1] = 0x88404000; break;
1166      case TYPE_F32: code[1] = 0x8c004000; break;
1167      case TYPE_S32: code[1] = 0x0c014000; break;
1168      case TYPE_U32: code[1] = 0x0c004000; break;
1169      case TYPE_F16: code[1] = 0x8c000000; break;
1170      case TYPE_S16: code[1] = 0x0c010000; break;
1171      case TYPE_U16: code[1] = 0x0c000000; break;
1172      case TYPE_S8:  code[1] = 0x0c018000; break;
1173      case TYPE_U8:  code[1] = 0x0c008000; break;
1174      default:
1175         assert(0);
1176         break;
1177      }
1178      break;
1179   case TYPE_U32:
1180      switch (i->sType) {
1181      case TYPE_F64: code[1] = 0x80404000; break;
1182      case TYPE_F32: code[1] = 0x84004000; break;
1183      case TYPE_S32: code[1] = 0x04014000; break;
1184      case TYPE_U32: code[1] = 0x04004000; break;
1185      case TYPE_F16: code[1] = 0x84000000; break;
1186      case TYPE_S16: code[1] = 0x04010000; break;
1187      case TYPE_U16: code[1] = 0x04000000; break;
1188      case TYPE_S8:  code[1] = 0x04018000; break;
1189      case TYPE_U8:  code[1] = 0x04008000; break;
1190      default:
1191         assert(0);
1192         break;
1193      }
1194      break;
1195   case TYPE_S16:
1196   case TYPE_U16:
1197   case TYPE_S8:
1198   case TYPE_U8:
1199   default:
1200      assert(0);
1201      break;
1202   }
1203   if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1204      code[1] |= 0x00004000;
1205
1206   roundMode_CVT(rnd);
1207
1208   switch (i->op) {
1209   case OP_ABS: code[1] |= 1 << 20; break;
1210   case OP_SAT: code[1] |= 1 << 19; break;
1211   case OP_NEG: code[1] |= 1 << 29; break;
1212   default:
1213      break;
1214   }
1215   code[1] ^= i->src(0).mod.neg() << 29;
1216   code[1] |= i->src(0).mod.abs() << 20;
1217   if (i->saturate)
1218      code[1] |= 1 << 19;
1219
1220   assert(i->op != OP_ABS || !i->src(0).mod.neg());
1221
1222   emitForm_MAD(i);
1223}
1224
1225void
1226CodeEmitterNV50::emitPreOp(const Instruction *i)
1227{
1228   code[0] = 0xb0000000;
1229   code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1230
1231   code[1] |= i->src(0).mod.abs() << 20;
1232   code[1] |= i->src(0).mod.neg() << 26;
1233
1234   emitForm_MAD(i);
1235}
1236
1237void
1238CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1239{
1240   code[0] = 0x90000000;
1241
1242   if (i->encSize == 4) {
1243      assert(i->op == OP_RCP);
1244      code[0] |= i->src(0).mod.abs() << 15;
1245      code[0] |= i->src(0).mod.neg() << 22;
1246      emitForm_MUL(i);
1247   } else {
1248      code[1] = subOp << 29;
1249      code[1] |= i->src(0).mod.abs() << 20;
1250      code[1] |= i->src(0).mod.neg() << 26;
1251      emitForm_MAD(i);
1252   }
1253}
1254
1255void
1256CodeEmitterNV50::emitNOT(const Instruction *i)
1257{
1258   code[0] = 0xd0000000;
1259   code[1] = 0x0002c000;
1260
1261   switch (i->sType) {
1262   case TYPE_U32:
1263   case TYPE_S32:
1264      code[1] |= 0x04000000;
1265      break;
1266   default:
1267      break;
1268   }
1269   emitForm_MAD(i);
1270   setSrc(i, 0, 1);
1271}
1272
1273void
1274CodeEmitterNV50::emitLogicOp(const Instruction *i)
1275{
1276   code[0] = 0xd0000000;
1277   code[1] = 0;
1278
1279   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1280      switch (i->op) {
1281      case OP_OR:  code[0] |= 0x0100; break;
1282      case OP_XOR: code[0] |= 0x8000; break;
1283      default:
1284         assert(i->op == OP_AND);
1285         break;
1286      }
1287      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1288         code[0] |= 1 << 22;
1289
1290      emitForm_IMM(i);
1291   } else {
1292      switch (i->op) {
1293      case OP_AND: code[1] = 0x04000000; break;
1294      case OP_OR:  code[1] = 0x04004000; break;
1295      case OP_XOR: code[1] = 0x04008000; break;
1296      default:
1297         assert(0);
1298         break;
1299      }
1300      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1301         code[1] |= 1 << 16;
1302      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1303         code[1] |= 1 << 17;
1304
1305      emitForm_MAD(i);
1306   }
1307}
1308
1309void
1310CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1311{
1312   code[0] = 0x00000001 | (shl << 16);
1313   code[1] = 0xc0000000;
1314
1315   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1316
1317   setSrcFileBits(i, NV50_OP_ENC_IMM);
1318   setSrc(i, 0, 0);
1319   emitFlagsRd(i);
1320}
1321
1322void
1323CodeEmitterNV50::emitShift(const Instruction *i)
1324{
1325   if (i->def(0).getFile() == FILE_ADDRESS) {
1326      assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1327      emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1328   } else {
1329      code[0] = 0x30000001;
1330      code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1331      if (i->op == OP_SHR && isSignedType(i->sType))
1332          code[1] |= 1 << 27;
1333
1334      if (i->src(1).getFile() == FILE_IMMEDIATE) {
1335         code[1] |= 1 << 20;
1336         code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1337         defId(i->def(0), 2);
1338         srcId(i->src(0), 9);
1339         emitFlagsRd(i);
1340      } else {
1341         emitForm_MAD(i);
1342      }
1343   }
1344}
1345
1346void
1347CodeEmitterNV50::emitOUT(const Instruction *i)
1348{
1349   code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
1350   code[1] = 0xc0000001;
1351
1352   emitFlagsRd(i);
1353}
1354
1355void
1356CodeEmitterNV50::emitTEX(const TexInstruction *i)
1357{
1358   code[0] = 0xf0000001;
1359   code[1] = 0x00000000;
1360
1361   switch (i->op) {
1362   case OP_TXB:
1363      code[1] = 0x20000000;
1364      break;
1365   case OP_TXL:
1366      code[1] = 0x40000000;
1367      break;
1368   case OP_TXF:
1369      code[0] |= 0x01000000;
1370      break;
1371   case OP_TXG:
1372      code[0] = 0x01000000;
1373      code[1] = 0x80000000;
1374      break;
1375   default:
1376      assert(i->op == OP_TEX);
1377      break;
1378   }
1379
1380   code[0] |= i->tex.r << 9;
1381   code[0] |= i->tex.s << 17;
1382
1383   int argc = i->tex.target.getArgCount();
1384
1385   if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1386      argc += 1;
1387   if (i->tex.target.isShadow())
1388      argc += 1;
1389   assert(argc <= 4);
1390
1391   code[0] |= (argc - 1) << 22;
1392
1393   if (i->tex.target.isCube()) {
1394      code[0] |= 0x08000000;
1395   } else
1396   if (i->tex.useOffsets) {
1397      code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
1398      code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
1399      code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
1400   }
1401
1402   code[0] |= (i->tex.mask & 0x3) << 25;
1403   code[1] |= (i->tex.mask & 0xc) << 12;
1404
1405   if (i->tex.liveOnly)
1406      code[1] |= 4;
1407
1408   defId(i->def(0), 2);
1409
1410   emitFlagsRd(i);
1411}
1412
1413void
1414CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1415{
1416   assert(i->tex.query == TXQ_DIMS);
1417
1418   code[0] = 0xf0000001;
1419   code[1] = 0x60000000;
1420
1421   code[0] |= i->tex.r << 9;
1422   code[0] |= i->tex.s << 17;
1423
1424   code[0] |= (i->tex.mask & 0x3) << 25;
1425   code[1] |= (i->tex.mask & 0xc) << 12;
1426
1427   defId(i->def(0), 2);
1428
1429   emitFlagsRd(i);
1430}
1431
1432void
1433CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1434{
1435   uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1436
1437   code[0] = 0x10000003; // bra
1438   code[1] = 0x00000780; // always
1439
1440   switch (i->subOp) {
1441   case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1442      break;
1443   case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1444      pos += 8;
1445      break;
1446   default:
1447      assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1448      code[0] = 0x20000003; // call
1449      code[1] = 0x00000000; // no predicate
1450      break;
1451   }
1452   addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1453   addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1454}
1455
1456void
1457CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1458{
1459   const FlowInstruction *f = i->asFlow();
1460   bool hasPred = false;
1461   bool hasTarg = false;
1462
1463   code[0] = 0x00000003 | (flowOp << 28);
1464   code[1] = 0x00000000;
1465
1466   switch (i->op) {
1467   case OP_BRA:
1468      hasPred = true;
1469      hasTarg = true;
1470      break;
1471   case OP_BREAK:
1472   case OP_BRKPT:
1473   case OP_DISCARD:
1474   case OP_RET:
1475      hasPred = true;
1476      break;
1477   case OP_CALL:
1478   case OP_PREBREAK:
1479   case OP_JOINAT:
1480      hasTarg = true;
1481      break;
1482   case OP_PRERET:
1483      hasTarg = true;
1484      if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1485         emitPRERETEmu(f);
1486         return;
1487      }
1488      break;
1489   default:
1490      break;
1491   }
1492
1493   if (hasPred)
1494      emitFlagsRd(i);
1495
1496   if (hasTarg && f) {
1497      uint32_t pos;
1498
1499      if (f->op == OP_CALL) {
1500         if (f->builtin) {
1501            pos = targ->getBuiltinOffset(f->target.builtin);
1502         } else {
1503            pos = f->target.fn->binPos;
1504         }
1505      } else {
1506         pos = f->target.bb->binPos;
1507      }
1508
1509      code[0] |= ((pos >>  2) & 0xffff) << 11;
1510      code[1] |= ((pos >> 18) & 0x003f) << 14;
1511
1512      RelocEntry::Type relocTy;
1513
1514      relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1515
1516      addReloc(relocTy, 0, pos, 0x07fff800, 9);
1517      addReloc(relocTy, 1, pos, 0x000fc000, -4);
1518   }
1519}
1520
1521bool
1522CodeEmitterNV50::emitInstruction(Instruction *insn)
1523{
1524   if (!insn->encSize) {
1525      ERROR("skipping unencodable instruction: "); insn->print();
1526      return false;
1527   } else
1528   if (codeSize + insn->encSize > codeSizeLimit) {
1529      ERROR("code emitter output buffer too small\n");
1530      return false;
1531   }
1532
1533   if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1534      INFO("EMIT: "); insn->print();
1535   }
1536
1537   switch (insn->op) {
1538   case OP_MOV:
1539      emitMOV(insn);
1540      break;
1541   case OP_EXIT:
1542   case OP_NOP:
1543   case OP_JOIN:
1544      emitNOP();
1545      break;
1546   case OP_VFETCH:
1547   case OP_LOAD:
1548      emitLOAD(insn);
1549      break;
1550   case OP_EXPORT:
1551   case OP_STORE:
1552      emitSTORE(insn);
1553      break;
1554   case OP_PFETCH:
1555      emitPFETCH(insn);
1556      break;
1557   case OP_LINTERP:
1558   case OP_PINTERP:
1559      emitINTERP(insn);
1560      break;
1561   case OP_ADD:
1562   case OP_SUB:
1563      if (isFloatType(insn->dType))
1564         emitFADD(insn);
1565      else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1566         emitAADD(insn);
1567      else
1568         emitUADD(insn);
1569      break;
1570   case OP_MUL:
1571      if (isFloatType(insn->dType))
1572         emitFMUL(insn);
1573      else
1574         emitIMUL(insn);
1575      break;
1576   case OP_MAD:
1577   case OP_FMA:
1578      if (isFloatType(insn->dType))
1579         emitFMAD(insn);
1580      else
1581         emitIMAD(insn);
1582      break;
1583   case OP_SAD:
1584      emitISAD(insn);
1585      break;
1586   case OP_NOT:
1587      emitNOT(insn);
1588      break;
1589   case OP_AND:
1590   case OP_OR:
1591   case OP_XOR:
1592      emitLogicOp(insn);
1593      break;
1594   case OP_SHL:
1595   case OP_SHR:
1596      emitShift(insn);
1597      break;
1598   case OP_SET:
1599      emitSET(insn);
1600      break;
1601   case OP_MIN:
1602   case OP_MAX:
1603      emitMINMAX(insn);
1604      break;
1605   case OP_CEIL:
1606   case OP_FLOOR:
1607   case OP_TRUNC:
1608   case OP_ABS:
1609   case OP_NEG:
1610   case OP_SAT:
1611      emitCVT(insn);
1612      break;
1613   case OP_CVT:
1614      if (insn->def(0).getFile() == FILE_ADDRESS)
1615         emitARL(insn, 0);
1616      else
1617      if (insn->def(0).getFile() == FILE_FLAGS ||
1618          insn->src(0).getFile() == FILE_FLAGS ||
1619          insn->src(0).getFile() == FILE_ADDRESS)
1620         emitMOV(insn);
1621      else
1622         emitCVT(insn);
1623      break;
1624   case OP_RCP:
1625      emitSFnOp(insn, 0);
1626      break;
1627   case OP_RSQ:
1628      emitSFnOp(insn, 2);
1629      break;
1630   case OP_LG2:
1631      emitSFnOp(insn, 3);
1632      break;
1633   case OP_SIN:
1634      emitSFnOp(insn, 4);
1635      break;
1636   case OP_COS:
1637      emitSFnOp(insn, 5);
1638      break;
1639   case OP_EX2:
1640      emitSFnOp(insn, 6);
1641      break;
1642   case OP_PRESIN:
1643   case OP_PREEX2:
1644      emitPreOp(insn);
1645      break;
1646   case OP_TEX:
1647   case OP_TXB:
1648   case OP_TXL:
1649   case OP_TXF:
1650      emitTEX(insn->asTex());
1651      break;
1652   case OP_TXQ:
1653      emitTXQ(insn->asTex());
1654      break;
1655   case OP_EMIT:
1656   case OP_RESTART:
1657      emitOUT(insn);
1658      break;
1659   case OP_DISCARD:
1660      emitFlow(insn, 0x0);
1661      break;
1662   case OP_BRA:
1663      emitFlow(insn, 0x1);
1664      break;
1665   case OP_CALL:
1666      emitFlow(insn, 0x2);
1667      break;
1668   case OP_RET:
1669      emitFlow(insn, 0x3);
1670      break;
1671   case OP_PREBREAK:
1672      emitFlow(insn, 0x4);
1673      break;
1674   case OP_BREAK:
1675      emitFlow(insn, 0x5);
1676      break;
1677   case OP_QUADON:
1678      emitFlow(insn, 0x6);
1679      break;
1680   case OP_QUADPOP:
1681      emitFlow(insn, 0x7);
1682      break;
1683   case OP_JOINAT:
1684      emitFlow(insn, 0xa);
1685      break;
1686   case OP_PRERET:
1687      emitFlow(insn, 0xd);
1688      break;
1689   case OP_QUADOP:
1690      emitQUADOP(insn, insn->lanes, insn->subOp);
1691      break;
1692   case OP_DFDX:
1693      emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1694      break;
1695   case OP_DFDY:
1696      emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1697      break;
1698   case OP_PHI:
1699   case OP_UNION:
1700   case OP_CONSTRAINT:
1701      ERROR("operation should have been eliminated\n");
1702      return false;
1703   case OP_EXP:
1704   case OP_LOG:
1705   case OP_SQRT:
1706   case OP_POW:
1707   case OP_SELP:
1708   case OP_SLCT:
1709   case OP_TXD:
1710   case OP_PRECONT:
1711   case OP_CONT:
1712   case OP_POPCNT:
1713   case OP_INSBF:
1714   case OP_EXTBF:
1715      ERROR("operation should have been lowered\n");
1716      return false;
1717   default:
1718      ERROR("unknown op: %u\n", insn->op);
1719      return false;
1720   }
1721   if (insn->join || insn->op == OP_JOIN)
1722      code[1] |= 0x2;
1723   else
1724   if (insn->exit || insn->op == OP_EXIT)
1725      code[1] |= 0x1;
1726
1727   assert((insn->encSize == 8) == (code[0] & 1));
1728
1729   code += insn->encSize / 4;
1730   codeSize += insn->encSize;
1731   return true;
1732}
1733
1734uint32_t
1735CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1736{
1737   const Target::OpInfo &info = targ->getOpInfo(i);
1738
1739   if (info.minEncSize > 4)
1740      return 8;
1741
1742   // check constraints on dst and src operands
1743   for (int d = 0; i->defExists(d); ++d) {
1744      if (i->def(d).rep()->reg.data.id > 63 ||
1745          i->def(d).rep()->reg.file != FILE_GPR)
1746         return 8;
1747   }
1748
1749   for (int s = 0; i->srcExists(s); ++s) {
1750      DataFile sf = i->src(s).getFile();
1751      if (sf != FILE_GPR)
1752         if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1753            return 8;
1754      if (i->src(s).rep()->reg.data.id > 63)
1755         return 8;
1756   }
1757
1758   // check modifiers & rounding
1759   if (i->join || i->lanes != 0xf || i->exit)
1760      return 8;
1761   if (i->op == OP_MUL && i->rnd != ROUND_N)
1762      return 8;
1763
1764   if (i->asTex())
1765      return 8; // TODO: short tex encoding
1766
1767   // check constraints on short MAD
1768   if (info.srcNr >= 2 && i->srcExists(2)) {
1769      if (i->saturate || i->src(2).mod)
1770         return 8;
1771      if ((i->src(0).mod ^ i->src(1).mod) ||
1772          (i->src(0).mod | i->src(1).mod).abs())
1773         return 8;
1774      if (!i->defExists(0) ||
1775          i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1776         return 8;
1777   }
1778
1779   return info.minEncSize;
1780}
1781
1782// Change the encoding size of an instruction after BBs have been scheduled.
1783static void
1784makeInstructionLong(Instruction *insn)
1785{
1786   if (insn->encSize == 8)
1787      return;
1788   Function *fn = insn->bb->getFunction();
1789   int n = 0;
1790   int adj = 4;
1791
1792   for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1793
1794   if (n & 1) {
1795      adj = 8;
1796      insn->next->encSize = 8;
1797   } else
1798   if (insn->prev && insn->prev->encSize == 4) {
1799      adj = 8;
1800      insn->prev->encSize = 8;
1801   }
1802   insn->encSize = 8;
1803
1804   for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1805      fn->bbArray[i]->binPos += 4;
1806   }
1807   fn->binSize += adj;
1808   insn->bb->binSize += adj;
1809}
1810
1811static bool
1812trySetExitModifier(Instruction *insn)
1813{
1814   if (insn->op == OP_DISCARD ||
1815       insn->op == OP_QUADON ||
1816       insn->op == OP_QUADPOP)
1817      return false;
1818   for (int s = 0; insn->srcExists(s); ++s)
1819      if (insn->src(s).getFile() == FILE_IMMEDIATE)
1820         return false;
1821   if (insn->asFlow()) {
1822      if (insn->op == OP_CALL) // side effects !
1823         return false;
1824      if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1825         return false;
1826      insn->op = OP_EXIT;
1827   }
1828   insn->exit = 1;
1829   makeInstructionLong(insn);
1830   return true;
1831}
1832
1833static void
1834replaceExitWithModifier(Function *func)
1835{
1836   BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
1837
1838   if (!epilogue->getExit() ||
1839       epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
1840      return;
1841
1842   if (epilogue->getEntry()->op != OP_EXIT) {
1843      Instruction *insn = epilogue->getExit()->prev;
1844      if (!insn || !trySetExitModifier(insn))
1845         return;
1846      insn->exit = 1;
1847   } else {
1848      for (Graph::EdgeIterator ei = func->cfgExit->incident();
1849           !ei.end(); ei.next()) {
1850         BasicBlock *bb = BasicBlock::get(ei.getNode());
1851         Instruction *i = bb->getExit();
1852
1853         if (!i || !trySetExitModifier(i))
1854            return;
1855      }
1856   }
1857   epilogue->binSize -= 8;
1858   func->binSize -= 8;
1859   delete_Instruction(func->getProgram(), epilogue->getExit());
1860}
1861
1862void
1863CodeEmitterNV50::prepareEmission(Function *func)
1864{
1865   CodeEmitter::prepareEmission(func);
1866
1867   replaceExitWithModifier(func);
1868}
1869
1870CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target)
1871{
1872   targ = target; // specialized
1873   code = NULL;
1874   codeSize = codeSizeLimit = 0;
1875   relocInfo = NULL;
1876}
1877
1878CodeEmitter *
1879TargetNV50::getCodeEmitter(Program::Type type)
1880{
1881   CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
1882   emit->setProgramType(type);
1883   return emit;
1884}
1885
1886} // namespace nv50_ir
1887