nv50_ir_emit_nv50.cpp revision f3a7be740dd9658097c2518a3bc6ec4ec197ad70
1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv50_ir.h"
24#include "nv50_ir_target_nv50.h"
25
26namespace nv50_ir {
27
28#define NV50_OP_ENC_LONG     0
29#define NV50_OP_ENC_SHORT    1
30#define NV50_OP_ENC_IMM      2
31#define NV50_OP_ENC_LONG_ALT 3
32
33class CodeEmitterNV50 : public CodeEmitter
34{
35public:
36   CodeEmitterNV50(const TargetNV50 *);
37
38   virtual bool emitInstruction(Instruction *);
39
40   virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42   inline void setProgramType(Program::Type pType) { progType = pType; }
43
44   virtual void prepareEmission(Function *);
45
46private:
47   Program::Type progType;
48
49   const TargetNV50 *targ;
50
51private:
52   inline void defId(const ValueDef&, const int pos);
53   inline void srcId(const ValueRef&, const int pos);
54   inline void srcId(const ValueRef *, const int pos);
55
56   inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57   inline void srcAddr8(const ValueRef&, const int pos);
58
59   void emitFlagsRd(const Instruction *);
60   void emitFlagsWr(const Instruction *);
61
62   void emitCondCode(CondCode cc, DataType ty, int pos);
63
64   inline void setARegBits(unsigned int);
65
66   void setAReg16(const Instruction *, int s);
67   void setImmediate(const Instruction *, int s);
68
69   void setDst(const Value *);
70   void setDst(const Instruction *, int d);
71   void setSrcFileBits(const Instruction *, int enc);
72   void setSrc(const Instruction *, unsigned int s, int slot);
73
74   void emitForm_MAD(const Instruction *);
75   void emitForm_ADD(const Instruction *);
76   void emitForm_MUL(const Instruction *);
77   void emitForm_IMM(const Instruction *);
78
79   void emitLoadStoreSizeLG(DataType ty, int pos);
80   void emitLoadStoreSizeCS(DataType ty);
81
82   void roundMode_MAD(const Instruction *);
83   void roundMode_CVT(RoundMode);
84
85   void emitMNeg12(const Instruction *);
86
87   void emitLOAD(const Instruction *);
88   void emitSTORE(const Instruction *);
89   void emitMOV(const Instruction *);
90   void emitNOP();
91   void emitINTERP(const Instruction *);
92   void emitPFETCH(const Instruction *);
93   void emitOUT(const Instruction *);
94
95   void emitUADD(const Instruction *);
96   void emitAADD(const Instruction *);
97   void emitFADD(const Instruction *);
98   void emitIMUL(const Instruction *);
99   void emitFMUL(const Instruction *);
100   void emitFMAD(const Instruction *);
101   void emitIMAD(const Instruction *);
102   void emitISAD(const Instruction *);
103
104   void emitMINMAX(const Instruction *);
105
106   void emitPreOp(const Instruction *);
107   void emitSFnOp(const Instruction *, uint8_t subOp);
108
109   void emitShift(const Instruction *);
110   void emitARL(const Instruction *, unsigned int shl);
111   void emitLogicOp(const Instruction *);
112   void emitNOT(const Instruction *);
113
114   void emitCVT(const Instruction *);
115   void emitSET(const Instruction *);
116
117   void emitTEX(const TexInstruction *);
118   void emitTXQ(const TexInstruction *);
119
120   void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
121
122   void emitFlow(const Instruction *, uint8_t flowOp);
123   void emitPRERETEmu(const FlowInstruction *);
124};
125
126#define SDATA(a) ((a).rep()->reg.data)
127#define DDATA(a) ((a).rep()->reg.data)
128
129void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
130{
131   assert(src.get());
132   code[pos / 32] |= SDATA(src).id << (pos % 32);
133}
134
135void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
136{
137   assert(src->get());
138   code[pos / 32] |= SDATA(*src).id << (pos % 32);
139}
140
141void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
142{
143   assert(src.get());
144
145   int32_t offset = SDATA(src).offset;
146
147   assert(!adj || src.get()->reg.size <= 4);
148   if (adj)
149      offset /= src.get()->reg.size;
150
151   assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
152
153   if (offset < 0)
154      offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
155
156   code[pos / 32] |= offset << (pos % 32);
157}
158
159void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
160{
161   assert(src.get());
162
163   uint32_t offset = SDATA(src).offset;
164
165   assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
166
167   code[pos / 32] |= (offset >> 2) << (pos % 32);
168}
169
170void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
171{
172   assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
173
174   code[pos / 32] |= DDATA(def).id << (pos % 32);
175}
176
177void
178CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
179{
180   switch (insn->rnd) {
181   case ROUND_M: code[1] |= 1 << 22; break;
182   case ROUND_P: code[1] |= 2 << 22; break;
183   case ROUND_Z: code[1] |= 3 << 22; break;
184   default:
185      assert(insn->rnd == ROUND_N);
186      break;
187   }
188}
189
190void
191CodeEmitterNV50::emitMNeg12(const Instruction *i)
192{
193   code[1] |= i->src(0).mod.neg() << 26;
194   code[1] |= i->src(1).mod.neg() << 27;
195}
196
197void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
198{
199   uint8_t enc;
200
201   assert(pos >= 32 || pos <= 27);
202
203   switch (cc) {
204   case CC_LT:  enc = 0x1; break;
205   case CC_LTU: enc = 0x9; break;
206   case CC_EQ:  enc = 0x2; break;
207   case CC_EQU: enc = 0xa; break;
208   case CC_LE:  enc = 0x3; break;
209   case CC_LEU: enc = 0xb; break;
210   case CC_GT:  enc = 0x4; break;
211   case CC_GTU: enc = 0xc; break;
212   case CC_NE:  enc = 0x5; break;
213   case CC_NEU: enc = 0xd; break;
214   case CC_GE:  enc = 0x6; break;
215   case CC_GEU: enc = 0xe; break;
216   case CC_TR:  enc = 0xf; break;
217   case CC_FL:  enc = 0x0; break;
218
219   case CC_O:  enc = 0x10; break;
220   case CC_C:  enc = 0x11; break;
221   case CC_A:  enc = 0x12; break;
222   case CC_S:  enc = 0x13; break;
223   case CC_NS: enc = 0x1c; break;
224   case CC_NA: enc = 0x1d; break;
225   case CC_NC: enc = 0x1e; break;
226   case CC_NO: enc = 0x1f; break;
227
228   default:
229      enc = 0;
230      assert(!"invalid condition code");
231      break;
232   }
233   if (ty != TYPE_NONE && !isFloatType(ty))
234      enc &= ~0x8; // unordered only exists for float types
235
236   code[pos / 32] |= enc << (pos % 32);
237}
238
239void
240CodeEmitterNV50::emitFlagsRd(const Instruction *i)
241{
242   int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
243
244   assert(!(code[1] & 0x00003f80));
245
246   if (s >= 0) {
247      assert(i->getSrc(s)->reg.file == FILE_FLAGS);
248      emitCondCode(i->cc, TYPE_NONE, 32 + 7);
249      srcId(i->src(s), 32 + 12);
250   } else {
251      code[1] |= 0x0780;
252   }
253}
254
255void
256CodeEmitterNV50::emitFlagsWr(const Instruction *i)
257{
258   assert(!(code[1] & 0x70));
259
260   int flagsDef = i->flagsDef;
261
262   // find flags definition and check that it is the last def
263   if (flagsDef < 0) {
264      for (int d = 0; i->defExists(d); ++d)
265         if (i->def(d).getFile() == FILE_FLAGS)
266            flagsDef = d;
267      if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
268         WARN("Instruction::flagsDef was not set properly\n");
269   }
270   if (flagsDef == 0 && i->defExists(1))
271      WARN("flags def should not be the primary definition\n");
272
273   if (flagsDef >= 0)
274      code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
275
276}
277
278void
279CodeEmitterNV50::setARegBits(unsigned int u)
280{
281   code[0] |= (u & 3) << 26;
282   code[1] |= (u & 4);
283}
284
285void
286CodeEmitterNV50::setAReg16(const Instruction *i, int s)
287{
288   if (i->srcExists(s)) {
289      s = i->src(s).indirect[0];
290      if (s >= 0)
291         setARegBits(SDATA(i->src(s)).id + 1);
292   }
293}
294
295void
296CodeEmitterNV50::setImmediate(const Instruction *i, int s)
297{
298   const ImmediateValue *imm = i->src(s).get()->asImm();
299   assert(imm);
300
301   uint32_t u = imm->reg.data.u32;
302
303   if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
304      u = ~u;
305
306   code[1] |= 3;
307   code[0] |= (u & 0x3f) << 16;
308   code[1] |= (u >> 6) << 2;
309}
310
311void
312CodeEmitterNV50::setDst(const Value *dst)
313{
314   const Storage *reg = &dst->join->reg;
315
316   assert(reg->file != FILE_ADDRESS);
317
318   if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
319      code[0] |= (127 << 2) | 1;
320      code[1] |= 8;
321   } else {
322      int id;
323      if (reg->file == FILE_SHADER_OUTPUT) {
324         code[1] |= 8;
325         id = reg->data.offset / 4;
326      } else {
327         id = reg->data.id;
328      }
329      code[0] |= id << 2;
330   }
331}
332
333void
334CodeEmitterNV50::setDst(const Instruction *i, int d)
335{
336   if (i->defExists(d)) {
337      setDst(i->getDef(d));
338   } else
339   if (!d) {
340      code[0] |= 0x01fc; // bit bucket
341      code[1] |= 0x0008;
342   }
343}
344
345// 3 * 2 bits:
346// 0: r
347// 1: a/s
348// 2: c
349// 3: i
350void
351CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
352{
353   uint8_t mode = 0;
354
355   for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
356      switch (i->src(s).getFile()) {
357      case FILE_GPR:
358         break;
359      case FILE_MEMORY_SHARED:
360      case FILE_SHADER_INPUT:
361         mode |= 1 << (s * 2);
362         break;
363      case FILE_MEMORY_CONST:
364         mode |= 2 << (s * 2);
365         break;
366      case FILE_IMMEDIATE:
367         mode |= 3 << (s * 2);
368         break;
369      default:
370	      ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
371         assert(0);
372         break;
373      }
374   }
375   switch (mode) {
376   case 0x00: // rrr
377      break;
378   case 0x01: // arr/grr
379      if (progType == Program::TYPE_GEOMETRY) {
380         code[0] |= 0x01800000;
381         if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
382            code[1] |= 0x00200000;
383      } else {
384         if (enc == NV50_OP_ENC_SHORT)
385            code[0] |= 0x01000000;
386         else
387            code[1] |= 0x00200000;
388      }
389      break;
390   case 0x03: // irr
391      assert(i->op == OP_MOV);
392      return;
393   case 0x0c: // rir
394      break;
395   case 0x0d: // gir
396      code[0] |= 0x01000000;
397      assert(progType == Program::TYPE_GEOMETRY ||
398             progType == Program::TYPE_COMPUTE);
399      break;
400   case 0x08: // rcr
401      code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
402      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
403      break;
404   case 0x09: // acr/gcr
405      if (progType == Program::TYPE_GEOMETRY) {
406         code[0] |= 0x01800000;
407      } else {
408         code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
409         code[1] |= 0x00200000;
410      }
411      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
412      break;
413   case 0x20: // rrc
414      code[0] |= 0x01000000;
415      code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
416      break;
417   case 0x21: // arc
418      code[0] |= 0x01000000;
419      code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
420      assert(progType != Program::TYPE_GEOMETRY);
421      break;
422   default:
423      ERROR("not encodable: %x\n", mode);
424      assert(0);
425      break;
426   }
427   if (progType != Program::TYPE_COMPUTE)
428      return;
429
430   if ((mode & 3) == 1) {
431      const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
432
433      switch (i->getSrc(0)->reg.type) {
434      case TYPE_U8:
435         break;
436      case TYPE_U16:
437         code[0] |= 1 << pos;
438         break;
439      case TYPE_S16:
440         code[0] |= 2 << pos;
441         break;
442      default:
443         code[0] |= 3 << pos;
444         assert(i->getSrc(0)->reg.size == 4);
445         break;
446      }
447   }
448}
449
450void
451CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
452{
453   if (Target::operationSrcNr[i->op] <= s)
454      return;
455   const Storage *reg = &i->src(s).rep()->reg;
456
457   unsigned int id = (reg->file == FILE_GPR) ?
458      reg->data.id :
459      reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
460
461   switch (slot) {
462   case 0: code[0] |= id << 9; break;
463   case 1: code[0] |= id << 16; break;
464   case 2: code[1] |= id << 14; break;
465   default:
466      assert(0);
467      break;
468   }
469}
470
471// the default form:
472//  - long instruction
473//  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
474//  - address & flags
475void
476CodeEmitterNV50::emitForm_MAD(const Instruction *i)
477{
478   assert(i->encSize == 8);
479   code[0] |= 1;
480
481   emitFlagsRd(i);
482   emitFlagsWr(i);
483
484   setDst(i, 0);
485
486   setSrcFileBits(i, NV50_OP_ENC_LONG);
487   setSrc(i, 0, 0);
488   setSrc(i, 1, 1);
489   setSrc(i, 2, 2);
490
491   setAReg16(i, 1);
492}
493
494// like default form, but 2nd source in slot 2, and no 3rd source
495void
496CodeEmitterNV50::emitForm_ADD(const Instruction *i)
497{
498   assert(i->encSize == 8);
499   code[0] |= 1;
500
501   emitFlagsRd(i);
502   emitFlagsWr(i);
503
504   setDst(i, 0);
505
506   setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
507   setSrc(i, 0, 0);
508   setSrc(i, 1, 2);
509
510   setAReg16(i, 1);
511}
512
513// default short form (rr, ar, rc, gr)
514void
515CodeEmitterNV50::emitForm_MUL(const Instruction *i)
516{
517   assert(i->encSize == 4 && !(code[0] & 1));
518   assert(i->defExists(0));
519   assert(!i->getPredicate());
520
521   setDst(i, 0);
522
523   setSrcFileBits(i, NV50_OP_ENC_SHORT);
524   setSrc(i, 0, 0);
525   setSrc(i, 1, 1);
526}
527
528// usual immediate form
529// - 1 to 3 sources where last is immediate (rir, gir)
530// - no address or predicate possible
531void
532CodeEmitterNV50::emitForm_IMM(const Instruction *i)
533{
534   assert(i->encSize == 8);
535   code[0] |= 1;
536
537   assert(i->defExists(0) && i->srcExists(0));
538
539   setDst(i, 0);
540
541   setSrcFileBits(i, NV50_OP_ENC_IMM);
542   if (Target::operationSrcNr[i->op] > 1) {
543      setSrc(i, 0, 0);
544      setImmediate(i, 1);
545      setSrc(i, 2, 1);
546   } else {
547      setImmediate(i, 0);
548   }
549}
550
551void
552CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
553{
554   uint8_t enc;
555
556   switch (ty) {
557   case TYPE_F32: // fall through
558   case TYPE_S32: // fall through
559   case TYPE_U32:  enc = 0x6; break;
560   case TYPE_B128: enc = 0x5; break;
561   case TYPE_F64: // fall through
562   case TYPE_S64: // fall through
563   case TYPE_U64:  enc = 0x4; break;
564   case TYPE_S16:  enc = 0x3; break;
565   case TYPE_U16:  enc = 0x2; break;
566   case TYPE_S8:   enc = 0x1; break;
567   case TYPE_U8:   enc = 0x0; break;
568   default:
569      enc = 0;
570      assert(!"invalid load/store type");
571      break;
572   }
573   code[pos / 32] |= enc << (pos % 32);
574}
575
576void
577CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
578{
579   switch (ty) {
580   case TYPE_U8: break;
581   case TYPE_U16: code[1] |= 0x4000; break;
582   case TYPE_S16: code[1] |= 0x8000; break;
583   case TYPE_F32:
584   case TYPE_S32:
585   case TYPE_U32: code[1] |= 0xc000; break;
586   default:
587      assert(0);
588      break;
589   }
590}
591
592void
593CodeEmitterNV50::emitLOAD(const Instruction *i)
594{
595   DataFile sf = i->src(0).getFile();
596   int32_t offset = i->getSrc(0)->reg.data.offset;
597
598   switch (sf) {
599   case FILE_SHADER_INPUT:
600      // use 'mov' where we can
601      code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
602      code[1] = 0x00200000 | (i->lanes << 14);
603      if (typeSizeof(i->dType) == 4)
604         code[1] |= 0x04000000;
605      break;
606   case FILE_MEMORY_SHARED:
607      if (targ->getChipset() >= 0x84) {
608         assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
609         code[0] = 0x10000001;
610         code[1] = 0x40000000;
611
612         if (typeSizeof(i->dType) == 4)
613            code[1] |= 0x04000000;
614
615         emitLoadStoreSizeCS(i->sType);
616      } else {
617         assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
618         code[0] = 0x10000001;
619         code[1] = 0x00200000 | (i->lanes << 14);
620         emitLoadStoreSizeCS(i->sType);
621      }
622      break;
623   case FILE_MEMORY_CONST:
624      code[0] = 0x10000001;
625      code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
626      if (typeSizeof(i->dType) == 4)
627         code[1] |= 0x04000000;
628      emitLoadStoreSizeCS(i->sType);
629      break;
630   case FILE_MEMORY_LOCAL:
631      code[0] = 0xd0000001;
632      code[1] = 0x40000000;
633      break;
634   case FILE_MEMORY_GLOBAL:
635      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
636      code[1] = 0x80000000;
637      break;
638   default:
639      assert(!"invalid load source file");
640      break;
641   }
642   if (sf == FILE_MEMORY_LOCAL ||
643       sf == FILE_MEMORY_GLOBAL)
644      emitLoadStoreSizeLG(i->sType, 21 + 32);
645
646   setDst(i, 0);
647
648   emitFlagsRd(i);
649   emitFlagsWr(i);
650
651   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
652      srcId(*i->src(0).getIndirect(0), 9);
653   } else {
654      setAReg16(i, 0);
655      srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
656   }
657}
658
659void
660CodeEmitterNV50::emitSTORE(const Instruction *i)
661{
662   DataFile f = i->getSrc(0)->reg.file;
663   int32_t offset = i->getSrc(0)->reg.data.offset;
664
665   switch (f) {
666   case FILE_SHADER_OUTPUT:
667      code[0] = 0x00000001 | ((offset >> 2) << 9);
668      code[1] = 0x80c00000;
669      srcId(i->src(1), 32 + 14);
670      break;
671   case FILE_MEMORY_GLOBAL:
672      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
673      code[1] = 0xa0000000;
674      emitLoadStoreSizeLG(i->dType, 21 + 32);
675      srcId(i->src(1), 2);
676      break;
677   case FILE_MEMORY_LOCAL:
678      code[0] = 0xd0000001;
679      code[1] = 0x60000000;
680      emitLoadStoreSizeLG(i->dType, 21 + 32);
681      srcId(i->src(1), 2);
682      break;
683   case FILE_MEMORY_SHARED:
684      code[0] = 0x00000001;
685      code[1] = 0xe0000000;
686      switch (typeSizeof(i->dType)) {
687      case 1:
688         code[0] |= offset << 9;
689         code[1] |= 0x00400000;
690         break;
691      case 2:
692         code[0] |= (offset >> 1) << 9;
693         break;
694      case 4:
695         code[0] |= (offset >> 2) << 9;
696         code[1] |= 0x04200000;
697         break;
698      default:
699         assert(0);
700         break;
701      }
702      srcId(i->src(1), 32 + 14);
703      break;
704   default:
705      assert(!"invalid store destination file");
706      break;
707   }
708
709   if (f == FILE_MEMORY_GLOBAL)
710      srcId(*i->src(0).getIndirect(0), 9);
711   else
712      setAReg16(i, 0);
713
714   if (f == FILE_MEMORY_LOCAL)
715      srcAddr16(i->src(0), false, 9);
716
717   emitFlagsRd(i);
718}
719
720void
721CodeEmitterNV50::emitMOV(const Instruction *i)
722{
723   DataFile sf = i->getSrc(0)->reg.file;
724   DataFile df = i->getDef(0)->reg.file;
725
726   assert(sf == FILE_GPR || df == FILE_GPR);
727
728   if (sf == FILE_FLAGS) {
729      code[0] = 0x00000001;
730      code[1] = 0x20000000;
731      defId(i->def(0), 2);
732      srcId(i->src(0), 12);
733      emitFlagsRd(i);
734   } else
735   if (sf == FILE_ADDRESS) {
736      code[0] = 0x00000001;
737      code[1] = 0x40000000;
738      defId(i->def(0), 2);
739      setARegBits(SDATA(i->src(0)).id + 1);
740      emitFlagsRd(i);
741   } else
742   if (df == FILE_FLAGS) {
743      code[0] = 0x00000001;
744      code[1] = 0xa0000000;
745      defId(i->def(0), 4);
746      srcId(i->src(0), 9);
747      emitFlagsRd(i);
748   } else
749   if (sf == FILE_IMMEDIATE) {
750      code[0] = 0x10008001;
751      code[1] = 0x00000003;
752      emitForm_IMM(i);
753   } else {
754      if (i->encSize == 4) {
755         code[0] = 0x10008000;
756      } else {
757         code[0] = 0x10000001;
758         code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
759         code[1] |= (i->lanes << 14);
760         emitFlagsRd(i);
761      }
762      defId(i->def(0), 2);
763      srcId(i->src(0), 9);
764   }
765   if (df == FILE_SHADER_OUTPUT) {
766      assert(i->encSize == 8);
767      code[1] |= 0x8;
768   }
769}
770
771void
772CodeEmitterNV50::emitNOP()
773{
774   code[0] = 0xf0000001;
775   code[1] = 0xe0000000;
776}
777
778void
779CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
780{
781   code[0] = 0xc0000000 | (lane << 16);
782   code[1] = 0x80000000;
783
784   code[0] |= (quOp & 0x03) << 20;
785   code[1] |= (quOp & 0xfc) << 20;
786
787   emitForm_ADD(i);
788
789   if (!i->srcExists(1))
790      srcId(i->src(0), 32 + 14);
791}
792
793void
794CodeEmitterNV50::emitPFETCH(const Instruction *i)
795{
796   code[0] = 0x11800001;
797   code[1] = 0x04200000 | (0xf << 14);
798
799   defId(i->def(0), 2);
800   srcAddr8(i->src(0), 9);
801   setAReg16(i, 0);
802}
803
804void
805CodeEmitterNV50::emitINTERP(const Instruction *i)
806{
807   code[0] = 0x80000000;
808
809   defId(i->def(0), 2);
810   srcAddr8(i->src(0), 16);
811
812   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
813      code[0] |= 1 << 8;
814   } else {
815      if (i->op == OP_PINTERP) {
816         code[0] |= 1 << 25;
817         srcId(i->src(1), 9);
818      }
819      if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
820         code[0] |= 1 << 24;
821   }
822
823   if (i->encSize == 8) {
824      code[1] =
825         (code[0] & (3 << 24)) >> (24 - 16) |
826         (code[0] & (1 <<  8)) << (18 -  8);
827      code[0] &= ~0x03000100;
828      code[0] |= 1;
829      emitFlagsRd(i);
830   }
831}
832
833void
834CodeEmitterNV50::emitMINMAX(const Instruction *i)
835{
836   if (i->dType == TYPE_F64) {
837      code[0] = 0xe0000000;
838      code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
839   } else {
840      code[0] = 0x30000000;
841      code[1] = 0x80000000;
842      if (i->op == OP_MIN)
843         code[1] |= 0x20000000;
844
845      switch (i->dType) {
846      case TYPE_F32: code[0] |= 0x80000000; break;
847      case TYPE_S32: code[1] |= 0x8c000000; break;
848      case TYPE_U32: code[1] |= 0x84000000; break;
849      case TYPE_S16: code[1] |= 0x80000000; break;
850      case TYPE_U16: break;
851      default:
852         assert(0);
853         break;
854      }
855      code[1] |= i->src(0).mod.abs() << 20;
856      code[1] |= i->src(1).mod.abs() << 19;
857   }
858   emitForm_MAD(i);
859}
860
861void
862CodeEmitterNV50::emitFMAD(const Instruction *i)
863{
864   const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
865   const int neg_add = i->src(2).mod.neg();
866
867   code[0] = 0xe0000000;
868
869   if (i->encSize == 4) {
870      emitForm_MUL(i);
871      assert(!neg_mul && !neg_add);
872   } else {
873      code[1]  = neg_mul << 26;
874      code[1] |= neg_add << 27;
875      if (i->saturate)
876         code[1] |= 1 << 29;
877      emitForm_MAD(i);
878   }
879}
880
881void
882CodeEmitterNV50::emitFADD(const Instruction *i)
883{
884   const int neg0 = i->src(0).mod.neg();
885   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
886
887   code[0] = 0xb0000000;
888
889   assert(!(i->src(0).mod | i->src(1).mod).abs());
890
891   if (i->src(1).getFile() == FILE_IMMEDIATE) {
892      code[1] = 0;
893      emitForm_IMM(i);
894      code[0] |= neg0 << 15;
895      code[0] |= neg1 << 22;
896      if (i->saturate)
897         code[0] |= 1 << 8;
898   } else
899   if (i->encSize == 8) {
900      code[1] = 0;
901      emitForm_ADD(i);
902      code[1] |= neg0 << 26;
903      code[1] |= neg1 << 27;
904      if (i->saturate)
905         code[1] |= 1 << 29;
906   } else {
907      emitForm_MUL(i);
908      code[0] |= neg0 << 15;
909      code[0] |= neg1 << 22;
910      if (i->saturate)
911         code[0] |= 1 << 8;
912   }
913}
914
915void
916CodeEmitterNV50::emitUADD(const Instruction *i)
917{
918   const int neg0 = i->src(0).mod.neg();
919   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
920
921   code[0] = 0x20008000;
922
923   if (i->src(1).getFile() == FILE_IMMEDIATE) {
924      code[1] = 0;
925      emitForm_IMM(i);
926   } else
927   if (i->encSize == 8) {
928      code[0] = 0x20000000;
929      code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
930      emitForm_ADD(i);
931   } else {
932      emitForm_MUL(i);
933   }
934   assert(!(neg0 && neg1));
935   code[0] |= neg0 << 28;
936   code[0] |= neg1 << 22;
937
938   if (i->flagsSrc >= 0) {
939      // addc == sub | subr
940      assert(!(code[0] & 0x10400000) && !i->getPredicate());
941      code[0] |= 0x10400000;
942      srcId(i->src(i->flagsSrc), 32 + 12);
943   }
944}
945
946void
947CodeEmitterNV50::emitAADD(const Instruction *i)
948{
949   const int s = (i->op == OP_MOV) ? 0 : 1;
950
951   code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
952   code[1] = 0x20000000;
953
954   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
955
956   emitFlagsRd(i);
957
958   if (s && i->srcExists(0))
959      setARegBits(SDATA(i->src(0)).id + 1);
960}
961
962void
963CodeEmitterNV50::emitIMUL(const Instruction *i)
964{
965   code[0] = 0x40000000;
966
967   if (i->encSize == 8) {
968      code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
969      emitForm_MAD(i);
970   } else {
971      if (i->sType == TYPE_S16)
972         code[0] |= 0x8100;
973      emitForm_MUL(i);
974   }
975}
976
977void
978CodeEmitterNV50::emitFMUL(const Instruction *i)
979{
980   const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
981
982   code[0] = 0xc0000000;
983
984   if (i->src(1).getFile() == FILE_IMMEDIATE) {
985      code[1] = 0;
986      emitForm_IMM(i);
987      if (neg)
988         code[0] |= 0x8000;
989   } else
990   if (i->encSize == 8) {
991      code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
992      if (neg)
993         code[1] |= 0x08000000;
994      emitForm_MAD(i);
995   } else {
996      emitForm_MUL(i);
997      if (neg)
998         code[0] |= 0x8000;
999   }
1000}
1001
1002void
1003CodeEmitterNV50::emitIMAD(const Instruction *i)
1004{
1005   code[0] = 0x60000000;
1006   if (isSignedType(i->sType))
1007      code[1] = i->saturate ? 0x40000000 : 0x20000000;
1008   else
1009      code[1] = 0x00000000;
1010
1011   int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1012   int neg2 = i->src(2).mod.neg();
1013
1014   assert(!(neg1 & neg2));
1015   code[1] |= neg1 << 27;
1016   code[1] |= neg2 << 26;
1017
1018   emitForm_MAD(i);
1019
1020   if (i->flagsSrc >= 0) {
1021      // add with carry from $cX
1022      assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1023      code[1] |= 0xc << 24;
1024      srcId(i->src(i->flagsSrc), 32 + 12);
1025   }
1026}
1027
1028void
1029CodeEmitterNV50::emitISAD(const Instruction *i)
1030{
1031   if (i->encSize == 8) {
1032      code[0] = 0x50000000;
1033      switch (i->sType) {
1034      case TYPE_U32: code[1] = 0x04000000; break;
1035      case TYPE_S32: code[1] = 0x0c000000; break;
1036      case TYPE_U16: code[1] = 0x00000000; break;
1037      case TYPE_S16: code[1] = 0x08000000; break;
1038      default:
1039         assert(0);
1040         break;
1041      }
1042      emitForm_MAD(i);
1043   } else {
1044      switch (i->sType) {
1045      case TYPE_U32: code[0] = 0x50008000; break;
1046      case TYPE_S32: code[0] = 0x50008100; break;
1047      case TYPE_U16: code[0] = 0x50000000; break;
1048      case TYPE_S16: code[0] = 0x50000100; break;
1049      default:
1050         assert(0);
1051         break;
1052      }
1053      emitForm_MUL(i);
1054   }
1055}
1056
1057void
1058CodeEmitterNV50::emitSET(const Instruction *i)
1059{
1060   code[0] = 0x30000000;
1061   code[1] = 0x60000000;
1062
1063   emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1064
1065   switch (i->sType) {
1066   case TYPE_F32: code[0] |= 0x80000000; break;
1067   case TYPE_S32: code[1] |= 0x0c000000; break;
1068   case TYPE_U32: code[1] |= 0x04000000; break;
1069   case TYPE_S16: code[1] |= 0x08000000; break;
1070   case TYPE_U16: break;
1071   default:
1072      assert(0);
1073      break;
1074   }
1075   if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1076   if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1077   if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1078   if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1079
1080   emitForm_MAD(i);
1081}
1082
1083void
1084CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1085{
1086   switch (rnd) {
1087   case ROUND_NI: code[1] |= 0x08000000; break;
1088   case ROUND_M:  code[1] |= 0x00020000; break;
1089   case ROUND_MI: code[1] |= 0x08020000; break;
1090   case ROUND_P:  code[1] |= 0x00040000; break;
1091   case ROUND_PI: code[1] |= 0x08040000; break;
1092   case ROUND_Z:  code[1] |= 0x00060000; break;
1093   case ROUND_ZI: code[1] |= 0x08060000; break;
1094   default:
1095      assert(rnd == ROUND_N);
1096      break;
1097   }
1098}
1099
1100void
1101CodeEmitterNV50::emitCVT(const Instruction *i)
1102{
1103   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1104   RoundMode rnd;
1105
1106   switch (i->op) {
1107   case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1108   case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1109   case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1110   default:
1111      rnd = i->rnd;
1112      break;
1113   }
1114
1115   code[0] = 0xa0000000;
1116
1117   switch (i->dType) {
1118   case TYPE_F64:
1119      switch (i->sType) {
1120      case TYPE_F64: code[1] = 0xc4404000; break;
1121      case TYPE_S64: code[1] = 0x44414000; break;
1122      case TYPE_U64: code[1] = 0x44404000; break;
1123      case TYPE_F32: code[1] = 0xc4400000; break;
1124      case TYPE_S32: code[1] = 0x44410000; break;
1125      case TYPE_U32: code[1] = 0x44400000; break;
1126      default:
1127         assert(0);
1128         break;
1129      }
1130      break;
1131   case TYPE_S64:
1132      switch (i->sType) {
1133      case TYPE_F64: code[1] = 0x8c404000; break;
1134      case TYPE_F32: code[1] = 0x8c400000; break;
1135      default:
1136         assert(0);
1137         break;
1138      }
1139      break;
1140   case TYPE_U64:
1141      switch (i->sType) {
1142      case TYPE_F64: code[1] = 0x84404000; break;
1143      case TYPE_F32: code[1] = 0x84400000; break;
1144      default:
1145         assert(0);
1146         break;
1147      }
1148      break;
1149   case TYPE_F32:
1150      switch (i->sType) {
1151      case TYPE_F64: code[1] = 0xc0404000; break;
1152      case TYPE_S64: code[1] = 0x40414000; break;
1153      case TYPE_U64: code[1] = 0x40404000; break;
1154      case TYPE_F32: code[1] = 0xc4004000; break;
1155      case TYPE_S32: code[1] = 0x44014000; break;
1156      case TYPE_U32: code[1] = 0x44004000; break;
1157      case TYPE_F16: code[1] = 0xc4000000; break;
1158      default:
1159         assert(0);
1160         break;
1161      }
1162      break;
1163   case TYPE_S32:
1164      switch (i->sType) {
1165      case TYPE_F64: code[1] = 0x88404000; break;
1166      case TYPE_F32: code[1] = 0x8c004000; break;
1167      case TYPE_S32: code[1] = 0x0c014000; break;
1168      case TYPE_U32: code[1] = 0x0c004000; break;
1169      case TYPE_F16: code[1] = 0x8c000000; break;
1170      case TYPE_S16: code[1] = 0x0c010000; break;
1171      case TYPE_U16: code[1] = 0x0c000000; break;
1172      case TYPE_S8:  code[1] = 0x0c018000; break;
1173      case TYPE_U8:  code[1] = 0x0c008000; break;
1174      default:
1175         assert(0);
1176         break;
1177      }
1178      break;
1179   case TYPE_U32:
1180      switch (i->sType) {
1181      case TYPE_F64: code[1] = 0x80404000; break;
1182      case TYPE_F32: code[1] = 0x84004000; break;
1183      case TYPE_S32: code[1] = 0x04014000; break;
1184      case TYPE_U32: code[1] = 0x04004000; break;
1185      case TYPE_F16: code[1] = 0x84000000; break;
1186      case TYPE_S16: code[1] = 0x04010000; break;
1187      case TYPE_U16: code[1] = 0x04000000; break;
1188      case TYPE_S8:  code[1] = 0x04018000; break;
1189      case TYPE_U8:  code[1] = 0x04008000; break;
1190      default:
1191         assert(0);
1192         break;
1193      }
1194      break;
1195   case TYPE_S16:
1196   case TYPE_U16:
1197   case TYPE_S8:
1198   case TYPE_U8:
1199   default:
1200      assert(0);
1201      break;
1202   }
1203   if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1204      code[1] |= 0x00004000;
1205
1206   roundMode_CVT(rnd);
1207
1208   switch (i->op) {
1209   case OP_ABS: code[1] |= 1 << 20; break;
1210   case OP_SAT: code[1] |= 1 << 19; break;
1211   case OP_NEG: code[1] |= 1 << 29; break;
1212   default:
1213      break;
1214   }
1215   code[1] ^= i->src(0).mod.neg() << 29;
1216   code[1] |= i->src(0).mod.abs() << 20;
1217   if (i->saturate)
1218      code[1] |= 1 << 19;
1219
1220   assert(i->op != OP_ABS || !i->src(0).mod.neg());
1221
1222   emitForm_MAD(i);
1223}
1224
1225void
1226CodeEmitterNV50::emitPreOp(const Instruction *i)
1227{
1228   code[0] = 0xb0000000;
1229   code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1230
1231   code[1] |= i->src(0).mod.abs() << 20;
1232   code[1] |= i->src(0).mod.neg() << 26;
1233
1234   emitForm_MAD(i);
1235}
1236
1237void
1238CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1239{
1240   code[0] = 0x90000000;
1241
1242   if (i->encSize == 4) {
1243      assert(i->op == OP_RCP);
1244      code[0] |= i->src(0).mod.abs() << 15;
1245      code[0] |= i->src(0).mod.neg() << 22;
1246      emitForm_MUL(i);
1247   } else {
1248      code[1] = subOp << 29;
1249      code[1] |= i->src(0).mod.abs() << 20;
1250      code[1] |= i->src(0).mod.neg() << 26;
1251      emitForm_MAD(i);
1252   }
1253}
1254
1255void
1256CodeEmitterNV50::emitNOT(const Instruction *i)
1257{
1258   code[0] = 0xd0000000;
1259   code[1] = 0x0002c000;
1260
1261   switch (i->sType) {
1262   case TYPE_U32:
1263   case TYPE_S32:
1264      code[1] |= 0x04000000;
1265      break;
1266   default:
1267      break;
1268   }
1269   emitForm_MAD(i);
1270   setSrc(i, 0, 1);
1271}
1272
1273void
1274CodeEmitterNV50::emitLogicOp(const Instruction *i)
1275{
1276   code[0] = 0xd0000000;
1277   code[1] = 0;
1278
1279   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1280      switch (i->op) {
1281      case OP_OR:  code[0] |= 0x0100; break;
1282      case OP_XOR: code[0] |= 0x8000; break;
1283      default:
1284         assert(i->op == OP_AND);
1285         break;
1286      }
1287      emitForm_IMM(i);
1288   } else {
1289      switch (i->op) {
1290      case OP_AND: code[1] = 0x04000000; break;
1291      case OP_OR:  code[1] = 0x04004000; break;
1292      case OP_XOR: code[1] = 0x04008000; break;
1293      default:
1294         assert(0);
1295         break;
1296      }
1297      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1298         code[1] |= 1 << 16;
1299      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1300         code[1] |= 1 << 17;
1301
1302      emitForm_MAD(i);
1303   }
1304}
1305
1306void
1307CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1308{
1309   code[0] = 0x00000001 | (shl << 16);
1310   code[1] = 0xc0000000;
1311
1312   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1313
1314   setSrcFileBits(i, NV50_OP_ENC_IMM);
1315   setSrc(i, 0, 0);
1316   emitFlagsRd(i);
1317}
1318
1319void
1320CodeEmitterNV50::emitShift(const Instruction *i)
1321{
1322   if (i->def(0).getFile() == FILE_ADDRESS) {
1323      assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1324      emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1325   } else {
1326      code[0] = 0x30000001;
1327      code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1328      if (i->op == OP_SHR && isSignedType(i->sType))
1329          code[1] |= 1 << 27;
1330
1331      if (i->src(1).getFile() == FILE_IMMEDIATE) {
1332         code[1] |= 1 << 20;
1333         code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1334         defId(i->def(0), 2);
1335         srcId(i->src(0), 9);
1336         emitFlagsRd(i);
1337      } else {
1338         emitForm_MAD(i);
1339      }
1340   }
1341}
1342
1343void
1344CodeEmitterNV50::emitOUT(const Instruction *i)
1345{
1346   code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
1347   code[1] = 0xc0000001;
1348
1349   emitFlagsRd(i);
1350}
1351
1352void
1353CodeEmitterNV50::emitTEX(const TexInstruction *i)
1354{
1355   code[0] = 0xf0000001;
1356   code[1] = 0x00000000;
1357
1358   switch (i->op) {
1359   case OP_TXB:
1360      code[1] = 0x20000000;
1361      break;
1362   case OP_TXL:
1363      code[1] = 0x40000000;
1364      break;
1365   case OP_TXF:
1366      code[0] |= 0x01000000;
1367      break;
1368   case OP_TXG:
1369      code[0] = 0x01000000;
1370      code[1] = 0x80000000;
1371      break;
1372   default:
1373      assert(i->op == OP_TEX);
1374      break;
1375   }
1376
1377   code[0] |= i->tex.r << 9;
1378   code[0] |= i->tex.s << 17;
1379
1380   int argc = i->tex.target.getArgCount();
1381
1382   if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1383      argc += 1;
1384   if (i->tex.target.isShadow())
1385      argc += 1;
1386   assert(argc <= 4);
1387
1388   code[0] |= (argc - 1) << 22;
1389
1390   if (i->tex.target.isCube()) {
1391      code[0] |= 0x08000000;
1392   } else
1393   if (i->tex.useOffsets) {
1394      code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
1395      code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
1396      code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
1397   }
1398
1399   code[0] |= (i->tex.mask & 0x3) << 25;
1400   code[1] |= (i->tex.mask & 0xc) << 12;
1401
1402   if (i->tex.liveOnly)
1403      code[1] |= 4;
1404
1405   defId(i->def(0), 2);
1406
1407   emitFlagsRd(i);
1408}
1409
1410void
1411CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1412{
1413   assert(i->tex.query == TXQ_DIMS);
1414
1415   code[0] = 0xf0000001;
1416   code[1] = 0x60000000;
1417
1418   code[0] |= i->tex.r << 9;
1419   code[0] |= i->tex.s << 17;
1420
1421   code[0] |= (i->tex.mask & 0x3) << 25;
1422   code[1] |= (i->tex.mask & 0xc) << 12;
1423
1424   defId(i->def(0), 2);
1425
1426   emitFlagsRd(i);
1427}
1428
1429void
1430CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1431{
1432   uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1433
1434   code[0] = 0x10000003; // bra
1435   code[1] = 0x00000780; // always
1436
1437   switch (i->subOp) {
1438   case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1439      break;
1440   case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1441      pos += 8;
1442      break;
1443   default:
1444      assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1445      code[0] = 0x20000003; // call
1446      code[1] = 0x00000000; // no predicate
1447      break;
1448   }
1449   addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1450   addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1451}
1452
1453void
1454CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1455{
1456   const FlowInstruction *f = i->asFlow();
1457   bool hasPred = false;
1458   bool hasTarg = false;
1459
1460   code[0] = 0x00000003 | (flowOp << 28);
1461   code[1] = 0x00000000;
1462
1463   switch (i->op) {
1464   case OP_BRA:
1465      hasPred = true;
1466      hasTarg = true;
1467      break;
1468   case OP_BREAK:
1469   case OP_BRKPT:
1470   case OP_DISCARD:
1471   case OP_RET:
1472      hasPred = true;
1473      break;
1474   case OP_CALL:
1475   case OP_PREBREAK:
1476   case OP_JOINAT:
1477      hasTarg = true;
1478      break;
1479   case OP_PRERET:
1480      hasTarg = true;
1481      if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1482         emitPRERETEmu(f);
1483         return;
1484      }
1485      break;
1486   default:
1487      break;
1488   }
1489
1490   if (hasPred)
1491      emitFlagsRd(i);
1492
1493   if (hasTarg && f) {
1494      uint32_t pos;
1495
1496      if (f->op == OP_CALL) {
1497         if (f->builtin) {
1498            pos = targ->getBuiltinOffset(f->target.builtin);
1499         } else {
1500            pos = f->target.fn->binPos;
1501         }
1502      } else {
1503         pos = f->target.bb->binPos;
1504      }
1505
1506      code[0] |= ((pos >>  2) & 0xffff) << 11;
1507      code[1] |= ((pos >> 18) & 0x003f) << 14;
1508
1509      RelocEntry::Type relocTy;
1510
1511      relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1512
1513      addReloc(relocTy, 0, pos, 0x07fff800, 9);
1514      addReloc(relocTy, 1, pos, 0x000fc000, -4);
1515   }
1516}
1517
1518bool
1519CodeEmitterNV50::emitInstruction(Instruction *insn)
1520{
1521   if (!insn->encSize) {
1522      ERROR("skipping unencodable instruction: "); insn->print();
1523      return false;
1524   } else
1525   if (codeSize + insn->encSize > codeSizeLimit) {
1526      ERROR("code emitter output buffer too small\n");
1527      return false;
1528   }
1529
1530   if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1531      INFO("EMIT: "); insn->print();
1532   }
1533
1534   switch (insn->op) {
1535   case OP_MOV:
1536      emitMOV(insn);
1537      break;
1538   case OP_EXIT:
1539   case OP_NOP:
1540   case OP_JOIN:
1541      emitNOP();
1542      break;
1543   case OP_VFETCH:
1544   case OP_LOAD:
1545      emitLOAD(insn);
1546      break;
1547   case OP_EXPORT:
1548   case OP_STORE:
1549      emitSTORE(insn);
1550      break;
1551   case OP_PFETCH:
1552      emitPFETCH(insn);
1553      break;
1554   case OP_LINTERP:
1555   case OP_PINTERP:
1556      emitINTERP(insn);
1557      break;
1558   case OP_ADD:
1559   case OP_SUB:
1560      if (isFloatType(insn->dType))
1561         emitFADD(insn);
1562      else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1563         emitAADD(insn);
1564      else
1565         emitUADD(insn);
1566      break;
1567   case OP_MUL:
1568      if (isFloatType(insn->dType))
1569         emitFMUL(insn);
1570      else
1571         emitIMUL(insn);
1572      break;
1573   case OP_MAD:
1574   case OP_FMA:
1575      if (isFloatType(insn->dType))
1576         emitFMAD(insn);
1577      else
1578         emitIMAD(insn);
1579      break;
1580   case OP_SAD:
1581      emitISAD(insn);
1582      break;
1583   case OP_NOT:
1584      emitNOT(insn);
1585      break;
1586   case OP_AND:
1587   case OP_OR:
1588   case OP_XOR:
1589      emitLogicOp(insn);
1590      break;
1591   case OP_SHL:
1592   case OP_SHR:
1593      emitShift(insn);
1594      break;
1595   case OP_SET:
1596      emitSET(insn);
1597      break;
1598   case OP_MIN:
1599   case OP_MAX:
1600      emitMINMAX(insn);
1601      break;
1602   case OP_CEIL:
1603   case OP_FLOOR:
1604   case OP_TRUNC:
1605   case OP_ABS:
1606   case OP_NEG:
1607   case OP_SAT:
1608      emitCVT(insn);
1609      break;
1610   case OP_CVT:
1611      if (insn->def(0).getFile() == FILE_ADDRESS)
1612         emitARL(insn, 0);
1613      else
1614      if (insn->def(0).getFile() == FILE_FLAGS ||
1615          insn->src(0).getFile() == FILE_FLAGS ||
1616          insn->src(0).getFile() == FILE_ADDRESS)
1617         emitMOV(insn);
1618      else
1619         emitCVT(insn);
1620      break;
1621   case OP_RCP:
1622      emitSFnOp(insn, 0);
1623      break;
1624   case OP_RSQ:
1625      emitSFnOp(insn, 2);
1626      break;
1627   case OP_LG2:
1628      emitSFnOp(insn, 3);
1629      break;
1630   case OP_SIN:
1631      emitSFnOp(insn, 4);
1632      break;
1633   case OP_COS:
1634      emitSFnOp(insn, 5);
1635      break;
1636   case OP_EX2:
1637      emitSFnOp(insn, 6);
1638      break;
1639   case OP_PRESIN:
1640   case OP_PREEX2:
1641      emitPreOp(insn);
1642      break;
1643   case OP_TEX:
1644   case OP_TXB:
1645   case OP_TXL:
1646   case OP_TXF:
1647      emitTEX(insn->asTex());
1648      break;
1649   case OP_TXQ:
1650      emitTXQ(insn->asTex());
1651      break;
1652   case OP_EMIT:
1653   case OP_RESTART:
1654      emitOUT(insn);
1655      break;
1656   case OP_DISCARD:
1657      emitFlow(insn, 0x0);
1658      break;
1659   case OP_BRA:
1660      emitFlow(insn, 0x1);
1661      break;
1662   case OP_CALL:
1663      emitFlow(insn, 0x2);
1664      break;
1665   case OP_RET:
1666      emitFlow(insn, 0x3);
1667      break;
1668   case OP_PREBREAK:
1669      emitFlow(insn, 0x4);
1670      break;
1671   case OP_BREAK:
1672      emitFlow(insn, 0x5);
1673      break;
1674   case OP_QUADON:
1675      emitFlow(insn, 0x6);
1676      break;
1677   case OP_QUADPOP:
1678      emitFlow(insn, 0x7);
1679      break;
1680   case OP_JOINAT:
1681      emitFlow(insn, 0xa);
1682      break;
1683   case OP_PRERET:
1684      emitFlow(insn, 0xd);
1685      break;
1686   case OP_QUADOP:
1687      emitQUADOP(insn, insn->lanes, insn->subOp);
1688      break;
1689   case OP_DFDX:
1690      emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1691      break;
1692   case OP_DFDY:
1693      emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1694      break;
1695   case OP_PHI:
1696   case OP_UNION:
1697   case OP_CONSTRAINT:
1698      ERROR("operation should have been eliminated\n");
1699      return false;
1700   case OP_EXP:
1701   case OP_LOG:
1702   case OP_SQRT:
1703   case OP_POW:
1704   case OP_SELP:
1705   case OP_SLCT:
1706   case OP_TXD:
1707   case OP_PRECONT:
1708   case OP_CONT:
1709   case OP_POPCNT:
1710   case OP_INSBF:
1711   case OP_EXTBF:
1712      ERROR("operation should have been lowered\n");
1713      return false;
1714   default:
1715      ERROR("unknown op: %u\n", insn->op);
1716      return false;
1717   }
1718   if (insn->join || insn->op == OP_JOIN)
1719      code[1] |= 0x2;
1720   else
1721   if (insn->exit || insn->op == OP_EXIT)
1722      code[1] |= 0x1;
1723
1724   assert((insn->encSize == 8) == (code[0] & 1));
1725
1726   code += insn->encSize / 4;
1727   codeSize += insn->encSize;
1728   return true;
1729}
1730
1731uint32_t
1732CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1733{
1734   const Target::OpInfo &info = targ->getOpInfo(i);
1735
1736   if (info.minEncSize > 4)
1737      return 8;
1738
1739   // check constraints on dst and src operands
1740   for (int d = 0; i->defExists(d); ++d) {
1741      if (i->def(d).rep()->reg.data.id > 63 ||
1742          i->def(d).rep()->reg.file != FILE_GPR)
1743         return 8;
1744   }
1745
1746   for (int s = 0; i->srcExists(s); ++s) {
1747      DataFile sf = i->src(s).getFile();
1748      if (sf != FILE_GPR)
1749         if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1750            return 8;
1751      if (i->src(s).rep()->reg.data.id > 63)
1752         return 8;
1753   }
1754
1755   // check modifiers & rounding
1756   if (i->join || i->lanes != 0xf || i->exit)
1757      return 8;
1758   if (i->op == OP_MUL && i->rnd != ROUND_N)
1759      return 8;
1760
1761   if (i->asTex())
1762      return 8; // TODO: short tex encoding
1763
1764   // check constraints on short MAD
1765   if (info.srcNr >= 2 && i->srcExists(2)) {
1766      if (i->saturate || i->src(2).mod)
1767         return 8;
1768      if ((i->src(0).mod ^ i->src(1).mod) ||
1769          (i->src(0).mod | i->src(1).mod).abs())
1770         return 8;
1771      if (!i->defExists(0) ||
1772          i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1773         return 8;
1774   }
1775
1776   return info.minEncSize;
1777}
1778
1779// Change the encoding size of an instruction after BBs have been scheduled.
1780static void
1781makeInstructionLong(Instruction *insn)
1782{
1783   if (insn->encSize == 8)
1784      return;
1785   Function *fn = insn->bb->getFunction();
1786   int n = 0;
1787   int adj = 4;
1788
1789   for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1790
1791   if (n & 1) {
1792      adj = 8;
1793      insn->next->encSize = 8;
1794   } else
1795   if (insn->prev && insn->prev->encSize == 4) {
1796      adj = 8;
1797      insn->prev->encSize = 8;
1798   }
1799   insn->encSize = 8;
1800
1801   for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1802      fn->bbArray[i]->binPos += 4;
1803   }
1804   fn->binSize += adj;
1805   insn->bb->binSize += adj;
1806}
1807
1808static bool
1809trySetExitModifier(Instruction *insn)
1810{
1811   if (insn->op == OP_DISCARD ||
1812       insn->op == OP_QUADON ||
1813       insn->op == OP_QUADPOP)
1814      return false;
1815   for (int s = 0; insn->srcExists(s); ++s)
1816      if (insn->src(s).getFile() == FILE_IMMEDIATE)
1817         return false;
1818   if (insn->asFlow()) {
1819      if (insn->op == OP_CALL) // side effects !
1820         return false;
1821      if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1822         return false;
1823      insn->op = OP_EXIT;
1824   }
1825   insn->exit = 1;
1826   makeInstructionLong(insn);
1827   return true;
1828}
1829
1830static void
1831replaceExitWithModifier(Function *func)
1832{
1833   BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
1834
1835   if (!epilogue->getExit() ||
1836       epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
1837      return;
1838
1839   if (epilogue->getEntry()->op != OP_EXIT) {
1840      Instruction *insn = epilogue->getExit()->prev;
1841      if (!insn || !trySetExitModifier(insn))
1842         return;
1843      insn->exit = 1;
1844   } else {
1845      for (Graph::EdgeIterator ei = func->cfgExit->incident();
1846           !ei.end(); ei.next()) {
1847         BasicBlock *bb = BasicBlock::get(ei.getNode());
1848         Instruction *i = bb->getExit();
1849
1850         if (!i || !trySetExitModifier(i))
1851            return;
1852      }
1853   }
1854   epilogue->binSize -= 8;
1855   func->binSize -= 8;
1856   delete_Instruction(func->getProgram(), epilogue->getExit());
1857}
1858
1859void
1860CodeEmitterNV50::prepareEmission(Function *func)
1861{
1862   CodeEmitter::prepareEmission(func);
1863
1864   replaceExitWithModifier(func);
1865}
1866
1867CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : CodeEmitter(target)
1868{
1869   targ = target; // specialized
1870   code = NULL;
1871   codeSize = codeSizeLimit = 0;
1872   relocInfo = NULL;
1873}
1874
1875CodeEmitter *
1876TargetNV50::getCodeEmitter(Program::Type type)
1877{
1878   CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
1879   emit->setProgramType(type);
1880   return emit;
1881}
1882
1883} // namespace nv50_ir
1884