1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "codegen/nv50_ir.h"
24#include "codegen/nv50_ir_target_nv50.h"
25
26namespace nv50_ir {
27
28#define NV50_OP_ENC_LONG     0
29#define NV50_OP_ENC_SHORT    1
30#define NV50_OP_ENC_IMM      2
31#define NV50_OP_ENC_LONG_ALT 3
32
33class CodeEmitterNV50 : public CodeEmitter
34{
35public:
36   CodeEmitterNV50(const TargetNV50 *);
37
38   virtual bool emitInstruction(Instruction *);
39
40   virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42   inline void setProgramType(Program::Type pType) { progType = pType; }
43
44   virtual void prepareEmission(Function *);
45
46private:
47   Program::Type progType;
48
49   const TargetNV50 *targNV50;
50
51private:
52   inline void defId(const ValueDef&, const int pos);
53   inline void srcId(const ValueRef&, const int pos);
54   inline void srcId(const ValueRef *, const int pos);
55
56   inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57   inline void srcAddr8(const ValueRef&, const int pos);
58
59   void emitFlagsRd(const Instruction *);
60   void emitFlagsWr(const Instruction *);
61
62   void emitCondCode(CondCode cc, DataType ty, int pos);
63
64   inline void setARegBits(unsigned int);
65
66   void setAReg16(const Instruction *, int s);
67   void setImmediate(const Instruction *, int s);
68
69   void setDst(const Value *);
70   void setDst(const Instruction *, int d);
71   void setSrcFileBits(const Instruction *, int enc);
72   void setSrc(const Instruction *, unsigned int s, int slot);
73
74   void emitForm_MAD(const Instruction *);
75   void emitForm_ADD(const Instruction *);
76   void emitForm_MUL(const Instruction *);
77   void emitForm_IMM(const Instruction *);
78
79   void emitLoadStoreSizeLG(DataType ty, int pos);
80   void emitLoadStoreSizeCS(DataType ty);
81
82   void roundMode_MAD(const Instruction *);
83   void roundMode_CVT(RoundMode);
84
85   void emitMNeg12(const Instruction *);
86
87   void emitLOAD(const Instruction *);
88   void emitSTORE(const Instruction *);
89   void emitMOV(const Instruction *);
90   void emitRDSV(const Instruction *);
91   void emitNOP();
92   void emitINTERP(const Instruction *);
93   void emitPFETCH(const Instruction *);
94   void emitOUT(const Instruction *);
95
96   void emitUADD(const Instruction *);
97   void emitAADD(const Instruction *);
98   void emitFADD(const Instruction *);
99   void emitDADD(const Instruction *);
100   void emitIMUL(const Instruction *);
101   void emitFMUL(const Instruction *);
102   void emitDMUL(const Instruction *);
103   void emitFMAD(const Instruction *);
104   void emitDMAD(const Instruction *);
105   void emitIMAD(const Instruction *);
106   void emitISAD(const Instruction *);
107
108   void emitMINMAX(const Instruction *);
109
110   void emitPreOp(const Instruction *);
111   void emitSFnOp(const Instruction *, uint8_t subOp);
112
113   void emitShift(const Instruction *);
114   void emitARL(const Instruction *, unsigned int shl);
115   void emitLogicOp(const Instruction *);
116   void emitNOT(const Instruction *);
117
118   void emitCVT(const Instruction *);
119   void emitSET(const Instruction *);
120
121   void emitTEX(const TexInstruction *);
122   void emitTXQ(const TexInstruction *);
123   void emitTEXPREP(const TexInstruction *);
124
125   void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
126
127   void emitFlow(const Instruction *, uint8_t flowOp);
128   void emitPRERETEmu(const FlowInstruction *);
129   void emitBAR(const Instruction *);
130
131   void emitATOM(const Instruction *);
132};
133
134#define SDATA(a) ((a).rep()->reg.data)
135#define DDATA(a) ((a).rep()->reg.data)
136
137void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
138{
139   assert(src.get());
140   code[pos / 32] |= SDATA(src).id << (pos % 32);
141}
142
143void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
144{
145   assert(src->get());
146   code[pos / 32] |= SDATA(*src).id << (pos % 32);
147}
148
149void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
150{
151   assert(src.get());
152
153   int32_t offset = SDATA(src).offset;
154
155   assert(!adj || src.get()->reg.size <= 4);
156   if (adj)
157      offset /= src.get()->reg.size;
158
159   assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
160
161   if (offset < 0)
162      offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
163
164   code[pos / 32] |= offset << (pos % 32);
165}
166
167void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
168{
169   assert(src.get());
170
171   uint32_t offset = SDATA(src).offset;
172
173   assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
174
175   code[pos / 32] |= (offset >> 2) << (pos % 32);
176}
177
178void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
179{
180   assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
181
182   code[pos / 32] |= DDATA(def).id << (pos % 32);
183}
184
185void
186CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
187{
188   switch (insn->rnd) {
189   case ROUND_M: code[1] |= 1 << 22; break;
190   case ROUND_P: code[1] |= 2 << 22; break;
191   case ROUND_Z: code[1] |= 3 << 22; break;
192   default:
193      assert(insn->rnd == ROUND_N);
194      break;
195   }
196}
197
198void
199CodeEmitterNV50::emitMNeg12(const Instruction *i)
200{
201   code[1] |= i->src(0).mod.neg() << 26;
202   code[1] |= i->src(1).mod.neg() << 27;
203}
204
205void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
206{
207   uint8_t enc;
208
209   assert(pos >= 32 || pos <= 27);
210
211   switch (cc) {
212   case CC_LT:  enc = 0x1; break;
213   case CC_LTU: enc = 0x9; break;
214   case CC_EQ:  enc = 0x2; break;
215   case CC_EQU: enc = 0xa; break;
216   case CC_LE:  enc = 0x3; break;
217   case CC_LEU: enc = 0xb; break;
218   case CC_GT:  enc = 0x4; break;
219   case CC_GTU: enc = 0xc; break;
220   case CC_NE:  enc = 0x5; break;
221   case CC_NEU: enc = 0xd; break;
222   case CC_GE:  enc = 0x6; break;
223   case CC_GEU: enc = 0xe; break;
224   case CC_TR:  enc = 0xf; break;
225   case CC_FL:  enc = 0x0; break;
226
227   case CC_O:  enc = 0x10; break;
228   case CC_C:  enc = 0x11; break;
229   case CC_A:  enc = 0x12; break;
230   case CC_S:  enc = 0x13; break;
231   case CC_NS: enc = 0x1c; break;
232   case CC_NA: enc = 0x1d; break;
233   case CC_NC: enc = 0x1e; break;
234   case CC_NO: enc = 0x1f; break;
235
236   default:
237      enc = 0;
238      assert(!"invalid condition code");
239      break;
240   }
241   if (ty != TYPE_NONE && !isFloatType(ty))
242      enc &= ~0x8; // unordered only exists for float types
243
244   code[pos / 32] |= enc << (pos % 32);
245}
246
247void
248CodeEmitterNV50::emitFlagsRd(const Instruction *i)
249{
250   int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
251
252   assert(!(code[1] & 0x00003f80));
253
254   if (s >= 0) {
255      assert(i->getSrc(s)->reg.file == FILE_FLAGS);
256      emitCondCode(i->cc, TYPE_NONE, 32 + 7);
257      srcId(i->src(s), 32 + 12);
258   } else {
259      code[1] |= 0x0780;
260   }
261}
262
263void
264CodeEmitterNV50::emitFlagsWr(const Instruction *i)
265{
266   assert(!(code[1] & 0x70));
267
268   int flagsDef = i->flagsDef;
269
270   // find flags definition and check that it is the last def
271   if (flagsDef < 0) {
272      for (int d = 0; i->defExists(d); ++d)
273         if (i->def(d).getFile() == FILE_FLAGS)
274            flagsDef = d;
275      if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
276         WARN("Instruction::flagsDef was not set properly\n");
277   }
278   if (flagsDef == 0 && i->defExists(1))
279      WARN("flags def should not be the primary definition\n");
280
281   if (flagsDef >= 0)
282      code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
283
284}
285
286void
287CodeEmitterNV50::setARegBits(unsigned int u)
288{
289   code[0] |= (u & 3) << 26;
290   code[1] |= (u & 4);
291}
292
293void
294CodeEmitterNV50::setAReg16(const Instruction *i, int s)
295{
296   if (i->srcExists(s)) {
297      s = i->src(s).indirect[0];
298      if (s >= 0)
299         setARegBits(SDATA(i->src(s)).id + 1);
300   }
301}
302
303void
304CodeEmitterNV50::setImmediate(const Instruction *i, int s)
305{
306   const ImmediateValue *imm = i->src(s).get()->asImm();
307   assert(imm);
308
309   uint32_t u = imm->reg.data.u32;
310
311   if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
312      u = ~u;
313
314   code[1] |= 3;
315   code[0] |= (u & 0x3f) << 16;
316   code[1] |= (u >> 6) << 2;
317}
318
319void
320CodeEmitterNV50::setDst(const Value *dst)
321{
322   const Storage *reg = &dst->join->reg;
323
324   assert(reg->file != FILE_ADDRESS);
325
326   if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
327      code[0] |= (127 << 2) | 1;
328      code[1] |= 8;
329   } else {
330      int id;
331      if (reg->file == FILE_SHADER_OUTPUT) {
332         code[1] |= 8;
333         id = reg->data.offset / 4;
334      } else {
335         id = reg->data.id;
336      }
337      code[0] |= id << 2;
338   }
339}
340
341void
342CodeEmitterNV50::setDst(const Instruction *i, int d)
343{
344   if (i->defExists(d)) {
345      setDst(i->getDef(d));
346   } else
347   if (!d) {
348      code[0] |= 0x01fc; // bit bucket
349      code[1] |= 0x0008;
350   }
351}
352
353// 3 * 2 bits:
354// 0: r
355// 1: a/s
356// 2: c
357// 3: i
358void
359CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
360{
361   uint8_t mode = 0;
362
363   for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
364      switch (i->src(s).getFile()) {
365      case FILE_GPR:
366         break;
367      case FILE_MEMORY_SHARED:
368      case FILE_SHADER_INPUT:
369         mode |= 1 << (s * 2);
370         break;
371      case FILE_MEMORY_CONST:
372         mode |= 2 << (s * 2);
373         break;
374      case FILE_IMMEDIATE:
375         mode |= 3 << (s * 2);
376         break;
377      default:
378         ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
379         assert(0);
380         break;
381      }
382   }
383   switch (mode) {
384   case 0x00: // rrr
385      break;
386   case 0x01: // arr/grr
387      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
388         code[0] |= 0x01800000;
389         if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
390            code[1] |= 0x00200000;
391      } else {
392         if (enc == NV50_OP_ENC_SHORT)
393            code[0] |= 0x01000000;
394         else
395            code[1] |= 0x00200000;
396      }
397      break;
398   case 0x03: // irr
399      assert(i->op == OP_MOV);
400      return;
401   case 0x0c: // rir
402      break;
403   case 0x0d: // gir
404      assert(progType == Program::TYPE_GEOMETRY ||
405             progType == Program::TYPE_COMPUTE);
406      code[0] |= 0x01000000;
407      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
408         int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
409         assert(reg < 3);
410         code[0] |= (reg + 1) << 26;
411      }
412      break;
413   case 0x08: // rcr
414      code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
415      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
416      break;
417   case 0x09: // acr/gcr
418      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
419         code[0] |= 0x01800000;
420      } else {
421         code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
422         code[1] |= 0x00200000;
423      }
424      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
425      break;
426   case 0x20: // rrc
427      code[0] |= 0x01000000;
428      code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
429      break;
430   case 0x21: // arc
431      code[0] |= 0x01000000;
432      code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
433      assert(progType != Program::TYPE_GEOMETRY);
434      break;
435   default:
436      ERROR("not encodable: %x\n", mode);
437      assert(0);
438      break;
439   }
440   if (progType != Program::TYPE_COMPUTE)
441      return;
442
443   if ((mode & 3) == 1) {
444      const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
445
446      switch (i->sType) {
447      case TYPE_U8:
448         break;
449      case TYPE_U16:
450         code[0] |= 1 << pos;
451         break;
452      case TYPE_S16:
453         code[0] |= 2 << pos;
454         break;
455      default:
456         code[0] |= 3 << pos;
457         assert(i->getSrc(0)->reg.size == 4);
458         break;
459      }
460   }
461}
462
463void
464CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
465{
466   if (Target::operationSrcNr[i->op] <= s)
467      return;
468   const Storage *reg = &i->src(s).rep()->reg;
469
470   unsigned int id = (reg->file == FILE_GPR) ?
471      reg->data.id :
472      reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
473
474   switch (slot) {
475   case 0: code[0] |= id << 9; break;
476   case 1: code[0] |= id << 16; break;
477   case 2: code[1] |= id << 14; break;
478   default:
479      assert(0);
480      break;
481   }
482}
483
484// the default form:
485//  - long instruction
486//  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
487//  - address & flags
488void
489CodeEmitterNV50::emitForm_MAD(const Instruction *i)
490{
491   assert(i->encSize == 8);
492   code[0] |= 1;
493
494   emitFlagsRd(i);
495   emitFlagsWr(i);
496
497   setDst(i, 0);
498
499   setSrcFileBits(i, NV50_OP_ENC_LONG);
500   setSrc(i, 0, 0);
501   setSrc(i, 1, 1);
502   setSrc(i, 2, 2);
503
504   if (i->getIndirect(0, 0)) {
505      assert(!i->srcExists(1) || !i->getIndirect(1, 0));
506      assert(!i->srcExists(2) || !i->getIndirect(2, 0));
507      setAReg16(i, 0);
508   } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
509      assert(!i->srcExists(2) || !i->getIndirect(2, 0));
510      setAReg16(i, 1);
511   } else {
512      setAReg16(i, 2);
513   }
514}
515
516// like default form, but 2nd source in slot 2, and no 3rd source
517void
518CodeEmitterNV50::emitForm_ADD(const Instruction *i)
519{
520   assert(i->encSize == 8);
521   code[0] |= 1;
522
523   emitFlagsRd(i);
524   emitFlagsWr(i);
525
526   setDst(i, 0);
527
528   setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
529   setSrc(i, 0, 0);
530   if (i->predSrc != 1)
531      setSrc(i, 1, 2);
532
533   if (i->getIndirect(0, 0)) {
534      assert(!i->getIndirect(1, 0));
535      setAReg16(i, 0);
536   } else {
537      setAReg16(i, 1);
538   }
539}
540
541// default short form (rr, ar, rc, gr)
542void
543CodeEmitterNV50::emitForm_MUL(const Instruction *i)
544{
545   assert(i->encSize == 4 && !(code[0] & 1));
546   assert(i->defExists(0));
547   assert(!i->getPredicate());
548
549   setDst(i, 0);
550
551   setSrcFileBits(i, NV50_OP_ENC_SHORT);
552   setSrc(i, 0, 0);
553   setSrc(i, 1, 1);
554}
555
556// usual immediate form
557// - 1 to 3 sources where second is immediate (rir, gir)
558// - no address or predicate possible
559void
560CodeEmitterNV50::emitForm_IMM(const Instruction *i)
561{
562   assert(i->encSize == 8);
563   code[0] |= 1;
564
565   assert(i->defExists(0) && i->srcExists(0));
566
567   setDst(i, 0);
568
569   setSrcFileBits(i, NV50_OP_ENC_IMM);
570   if (Target::operationSrcNr[i->op] > 1) {
571      setSrc(i, 0, 0);
572      setImmediate(i, 1);
573      // If there is another source, it has to be the same as the dest reg.
574   } else {
575      setImmediate(i, 0);
576   }
577}
578
579void
580CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
581{
582   uint8_t enc;
583
584   switch (ty) {
585   case TYPE_F32: // fall through
586   case TYPE_S32: // fall through
587   case TYPE_U32:  enc = 0x6; break;
588   case TYPE_B128: enc = 0x5; break;
589   case TYPE_F64: // fall through
590   case TYPE_S64: // fall through
591   case TYPE_U64:  enc = 0x4; break;
592   case TYPE_S16:  enc = 0x3; break;
593   case TYPE_U16:  enc = 0x2; break;
594   case TYPE_S8:   enc = 0x1; break;
595   case TYPE_U8:   enc = 0x0; break;
596   default:
597      enc = 0;
598      assert(!"invalid load/store type");
599      break;
600   }
601   code[pos / 32] |= enc << (pos % 32);
602}
603
604void
605CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
606{
607   switch (ty) {
608   case TYPE_U8: break;
609   case TYPE_U16: code[1] |= 0x4000; break;
610   case TYPE_S16: code[1] |= 0x8000; break;
611   case TYPE_F32:
612   case TYPE_S32:
613   case TYPE_U32: code[1] |= 0xc000; break;
614   default:
615      assert(0);
616      break;
617   }
618}
619
620void
621CodeEmitterNV50::emitLOAD(const Instruction *i)
622{
623   DataFile sf = i->src(0).getFile();
624   int32_t offset = i->getSrc(0)->reg.data.offset;
625
626   switch (sf) {
627   case FILE_SHADER_INPUT:
628      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
629         code[0] = 0x11800001;
630      else
631         // use 'mov' where we can
632         code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
633      code[1] = 0x00200000 | (i->lanes << 14);
634      if (typeSizeof(i->dType) == 4)
635         code[1] |= 0x04000000;
636      break;
637   case FILE_MEMORY_SHARED:
638      if (targ->getChipset() >= 0x84) {
639         assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
640         code[0] = 0x10000001;
641         code[1] = 0x40000000;
642
643         if (typeSizeof(i->dType) == 4)
644            code[1] |= 0x04000000;
645
646         emitLoadStoreSizeCS(i->sType);
647      } else {
648         assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
649         code[0] = 0x10000001;
650         code[1] = 0x00200000 | (i->lanes << 14);
651         emitLoadStoreSizeCS(i->sType);
652      }
653      break;
654   case FILE_MEMORY_CONST:
655      code[0] = 0x10000001;
656      code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
657      if (typeSizeof(i->dType) == 4)
658         code[1] |= 0x04000000;
659      emitLoadStoreSizeCS(i->sType);
660      break;
661   case FILE_MEMORY_LOCAL:
662      code[0] = 0xd0000001;
663      code[1] = 0x40000000;
664      break;
665   case FILE_MEMORY_GLOBAL:
666      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
667      code[1] = 0x80000000;
668      break;
669   default:
670      assert(!"invalid load source file");
671      break;
672   }
673   if (sf == FILE_MEMORY_LOCAL ||
674       sf == FILE_MEMORY_GLOBAL)
675      emitLoadStoreSizeLG(i->sType, 21 + 32);
676
677   setDst(i, 0);
678
679   emitFlagsRd(i);
680   emitFlagsWr(i);
681
682   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
683      srcId(*i->src(0).getIndirect(0), 9);
684   } else {
685      setAReg16(i, 0);
686      srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
687   }
688}
689
690void
691CodeEmitterNV50::emitSTORE(const Instruction *i)
692{
693   DataFile f = i->getSrc(0)->reg.file;
694   int32_t offset = i->getSrc(0)->reg.data.offset;
695
696   switch (f) {
697   case FILE_SHADER_OUTPUT:
698      code[0] = 0x00000001 | ((offset >> 2) << 9);
699      code[1] = 0x80c00000;
700      srcId(i->src(1), 32 + 14);
701      break;
702   case FILE_MEMORY_GLOBAL:
703      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
704      code[1] = 0xa0000000;
705      emitLoadStoreSizeLG(i->dType, 21 + 32);
706      srcId(i->src(1), 2);
707      break;
708   case FILE_MEMORY_LOCAL:
709      code[0] = 0xd0000001;
710      code[1] = 0x60000000;
711      emitLoadStoreSizeLG(i->dType, 21 + 32);
712      srcId(i->src(1), 2);
713      break;
714   case FILE_MEMORY_SHARED:
715      code[0] = 0x00000001;
716      code[1] = 0xe0000000;
717      switch (typeSizeof(i->dType)) {
718      case 1:
719         code[0] |= offset << 9;
720         code[1] |= 0x00400000;
721         break;
722      case 2:
723         code[0] |= (offset >> 1) << 9;
724         break;
725      case 4:
726         code[0] |= (offset >> 2) << 9;
727         code[1] |= 0x04200000;
728         break;
729      default:
730         assert(0);
731         break;
732      }
733      srcId(i->src(1), 32 + 14);
734      break;
735   default:
736      assert(!"invalid store destination file");
737      break;
738   }
739
740   if (f == FILE_MEMORY_GLOBAL)
741      srcId(*i->src(0).getIndirect(0), 9);
742   else
743      setAReg16(i, 0);
744
745   if (f == FILE_MEMORY_LOCAL)
746      srcAddr16(i->src(0), false, 9);
747
748   emitFlagsRd(i);
749}
750
751void
752CodeEmitterNV50::emitMOV(const Instruction *i)
753{
754   DataFile sf = i->getSrc(0)->reg.file;
755   DataFile df = i->getDef(0)->reg.file;
756
757   assert(sf == FILE_GPR || df == FILE_GPR);
758
759   if (sf == FILE_FLAGS) {
760      assert(i->flagsSrc >= 0);
761      code[0] = 0x00000001;
762      code[1] = 0x20000000;
763      defId(i->def(0), 2);
764      emitFlagsRd(i);
765   } else
766   if (sf == FILE_ADDRESS) {
767      code[0] = 0x00000001;
768      code[1] = 0x40000000;
769      defId(i->def(0), 2);
770      setARegBits(SDATA(i->src(0)).id + 1);
771      emitFlagsRd(i);
772   } else
773   if (df == FILE_FLAGS) {
774      assert(i->flagsDef >= 0);
775      code[0] = 0x00000001;
776      code[1] = 0xa0000000;
777      srcId(i->src(0), 9);
778      emitFlagsRd(i);
779      emitFlagsWr(i);
780   } else
781   if (sf == FILE_IMMEDIATE) {
782      code[0] = 0x10008001;
783      code[1] = 0x00000003;
784      emitForm_IMM(i);
785   } else {
786      if (i->encSize == 4) {
787         code[0] = 0x10008000;
788      } else {
789         code[0] = 0x10000001;
790         code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
791         code[1] |= (i->lanes << 14);
792         emitFlagsRd(i);
793      }
794      defId(i->def(0), 2);
795      srcId(i->src(0), 9);
796   }
797   if (df == FILE_SHADER_OUTPUT) {
798      assert(i->encSize == 8);
799      code[1] |= 0x8;
800   }
801}
802
803static inline uint8_t getSRegEncoding(const ValueRef &ref)
804{
805   switch (SDATA(ref).sv.sv) {
806   case SV_PHYSID:        return 0;
807   case SV_CLOCK:         return 1;
808   case SV_VERTEX_STRIDE: return 3;
809// case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
810   case SV_SAMPLE_INDEX:  return 8;
811   default:
812      assert(!"no sreg for system value");
813      return 0;
814   }
815}
816
817void
818CodeEmitterNV50::emitRDSV(const Instruction *i)
819{
820   code[0] = 0x00000001;
821   code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
822   defId(i->def(0), 2);
823   emitFlagsRd(i);
824}
825
826void
827CodeEmitterNV50::emitNOP()
828{
829   code[0] = 0xf0000001;
830   code[1] = 0xe0000000;
831}
832
833void
834CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
835{
836   code[0] = 0xc0000000 | (lane << 16);
837   code[1] = 0x80000000;
838
839   code[0] |= (quOp & 0x03) << 20;
840   code[1] |= (quOp & 0xfc) << 20;
841
842   emitForm_ADD(i);
843
844   if (!i->srcExists(1) || i->predSrc == 1)
845      srcId(i->src(0), 32 + 14);
846}
847
848/* NOTE: This returns the base address of a vertex inside the primitive.
849 * src0 is an immediate, the index (not offset) of the vertex
850 * inside the primitive. XXX: signed or unsigned ?
851 * src1 (may be NULL) should use whatever units the hardware requires
852 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
853 */
854void
855CodeEmitterNV50::emitPFETCH(const Instruction *i)
856{
857   const uint32_t prim = i->src(0).get()->reg.data.u32;
858   assert(prim <= 127);
859
860   if (i->def(0).getFile() == FILE_ADDRESS) {
861      // shl $aX a[] 0
862      code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
863      code[1] = 0xc0200000;
864      code[0] |= prim << 9;
865      assert(!i->srcExists(1));
866   } else
867   if (i->srcExists(1)) {
868      // ld b32 $rX a[$aX+base]
869      code[0] = 0x00000001;
870      code[1] = 0x04200000 | (0xf << 14);
871      defId(i->def(0), 2);
872      code[0] |= prim << 9;
873      setARegBits(SDATA(i->src(1)).id + 1);
874   } else {
875      // mov b32 $rX a[]
876      code[0] = 0x10000001;
877      code[1] = 0x04200000 | (0xf << 14);
878      defId(i->def(0), 2);
879      code[0] |= prim << 9;
880   }
881   emitFlagsRd(i);
882}
883
884static void
885interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
886{
887   int ipa = entry->ipa;
888   int encSize = entry->reg;
889   int loc = entry->loc;
890
891   if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
892       (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
893      if (data.force_persample_interp) {
894         if (encSize == 8)
895            code[loc + 1] |= 1 << 16;
896         else
897            code[loc + 0] |= 1 << 24;
898      } else {
899         if (encSize == 8)
900            code[loc + 1] &= ~(1 << 16);
901         else
902            code[loc + 0] &= ~(1 << 24);
903      }
904   }
905}
906
907void
908CodeEmitterNV50::emitINTERP(const Instruction *i)
909{
910   code[0] = 0x80000000;
911
912   defId(i->def(0), 2);
913   srcAddr8(i->src(0), 16);
914   setAReg16(i, 0);
915
916   if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
917      code[0] |= 1 << 8;
918   } else {
919      if (i->op == OP_PINTERP) {
920         code[0] |= 1 << 25;
921         srcId(i->src(1), 9);
922      }
923      if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
924         code[0] |= 1 << 24;
925   }
926
927   if (i->encSize == 8) {
928      if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
929         code[1] = 4 << 16;
930      else
931         code[1] = (code[0] & (3 << 24)) >> (24 - 16);
932      code[0] &= ~0x03000000;
933      code[0] |= 1;
934      emitFlagsRd(i);
935   }
936
937   addInterp(i->ipa, i->encSize, interpApply);
938}
939
940void
941CodeEmitterNV50::emitMINMAX(const Instruction *i)
942{
943   if (i->dType == TYPE_F64) {
944      code[0] = 0xe0000000;
945      code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
946   } else {
947      code[0] = 0x30000000;
948      code[1] = 0x80000000;
949      if (i->op == OP_MIN)
950         code[1] |= 0x20000000;
951
952      switch (i->dType) {
953      case TYPE_F32: code[0] |= 0x80000000; break;
954      case TYPE_S32: code[1] |= 0x8c000000; break;
955      case TYPE_U32: code[1] |= 0x84000000; break;
956      case TYPE_S16: code[1] |= 0x80000000; break;
957      case TYPE_U16: break;
958      default:
959         assert(0);
960         break;
961      }
962   }
963
964   code[1] |= i->src(0).mod.abs() << 20;
965   code[1] |= i->src(0).mod.neg() << 26;
966   code[1] |= i->src(1).mod.abs() << 19;
967   code[1] |= i->src(1).mod.neg() << 27;
968
969   emitForm_MAD(i);
970}
971
972void
973CodeEmitterNV50::emitFMAD(const Instruction *i)
974{
975   const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
976   const int neg_add = i->src(2).mod.neg();
977
978   code[0] = 0xe0000000;
979
980   if (i->src(1).getFile() == FILE_IMMEDIATE) {
981      code[1] = 0;
982      emitForm_IMM(i);
983      code[0] |= neg_mul << 15;
984      code[0] |= neg_add << 22;
985      if (i->saturate)
986         code[0] |= 1 << 8;
987   } else
988   if (i->encSize == 4) {
989      emitForm_MUL(i);
990      code[0] |= neg_mul << 15;
991      code[0] |= neg_add << 22;
992      if (i->saturate)
993         code[0] |= 1 << 8;
994   } else {
995      code[1]  = neg_mul << 26;
996      code[1] |= neg_add << 27;
997      if (i->saturate)
998         code[1] |= 1 << 29;
999      emitForm_MAD(i);
1000   }
1001}
1002
1003void
1004CodeEmitterNV50::emitDMAD(const Instruction *i)
1005{
1006   const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1007   const int neg_add = i->src(2).mod.neg();
1008
1009   assert(i->encSize == 8);
1010   assert(!i->saturate);
1011
1012   code[1] = 0x40000000;
1013   code[0] = 0xe0000000;
1014
1015   code[1] |= neg_mul << 26;
1016   code[1] |= neg_add << 27;
1017
1018   roundMode_MAD(i);
1019
1020   emitForm_MAD(i);
1021}
1022
1023void
1024CodeEmitterNV50::emitFADD(const Instruction *i)
1025{
1026   const int neg0 = i->src(0).mod.neg();
1027   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1028
1029   code[0] = 0xb0000000;
1030
1031   assert(!(i->src(0).mod | i->src(1).mod).abs());
1032
1033   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1034      code[1] = 0;
1035      emitForm_IMM(i);
1036      code[0] |= neg0 << 15;
1037      code[0] |= neg1 << 22;
1038      if (i->saturate)
1039         code[0] |= 1 << 8;
1040   } else
1041   if (i->encSize == 8) {
1042      code[1] = 0;
1043      emitForm_ADD(i);
1044      code[1] |= neg0 << 26;
1045      code[1] |= neg1 << 27;
1046      if (i->saturate)
1047         code[1] |= 1 << 29;
1048   } else {
1049      emitForm_MUL(i);
1050      code[0] |= neg0 << 15;
1051      code[0] |= neg1 << 22;
1052      if (i->saturate)
1053         code[0] |= 1 << 8;
1054   }
1055}
1056
1057void
1058CodeEmitterNV50::emitDADD(const Instruction *i)
1059{
1060   const int neg0 = i->src(0).mod.neg();
1061   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1062
1063   assert(!(i->src(0).mod | i->src(1).mod).abs());
1064   assert(!i->saturate);
1065   assert(i->encSize == 8);
1066
1067   code[1] = 0x60000000;
1068   code[0] = 0xe0000000;
1069
1070   emitForm_ADD(i);
1071
1072   code[1] |= neg0 << 26;
1073   code[1] |= neg1 << 27;
1074}
1075
1076void
1077CodeEmitterNV50::emitUADD(const Instruction *i)
1078{
1079   const int neg0 = i->src(0).mod.neg();
1080   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1081
1082   code[0] = 0x20008000;
1083
1084   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1085      code[1] = 0;
1086      emitForm_IMM(i);
1087   } else
1088   if (i->encSize == 8) {
1089      code[0] = 0x20000000;
1090      code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1091      emitForm_ADD(i);
1092   } else {
1093      emitForm_MUL(i);
1094   }
1095   assert(!(neg0 && neg1));
1096   code[0] |= neg0 << 28;
1097   code[0] |= neg1 << 22;
1098
1099   if (i->flagsSrc >= 0) {
1100      // addc == sub | subr
1101      assert(!(code[0] & 0x10400000) && !i->getPredicate());
1102      code[0] |= 0x10400000;
1103      srcId(i->src(i->flagsSrc), 32 + 12);
1104   }
1105}
1106
1107void
1108CodeEmitterNV50::emitAADD(const Instruction *i)
1109{
1110   const int s = (i->op == OP_MOV) ? 0 : 1;
1111
1112   code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1113   code[1] = 0x20000000;
1114
1115   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1116
1117   emitFlagsRd(i);
1118
1119   if (s && i->srcExists(0))
1120      setARegBits(SDATA(i->src(0)).id + 1);
1121}
1122
1123void
1124CodeEmitterNV50::emitIMUL(const Instruction *i)
1125{
1126   code[0] = 0x40000000;
1127
1128   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1129      if (i->sType == TYPE_S16)
1130         code[0] |= 0x8100;
1131      code[1] = 0;
1132      emitForm_IMM(i);
1133   } else
1134   if (i->encSize == 8) {
1135      code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1136      emitForm_MAD(i);
1137   } else {
1138      if (i->sType == TYPE_S16)
1139         code[0] |= 0x8100;
1140      emitForm_MUL(i);
1141   }
1142}
1143
1144void
1145CodeEmitterNV50::emitFMUL(const Instruction *i)
1146{
1147   const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1148
1149   code[0] = 0xc0000000;
1150
1151   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1152      code[1] = 0;
1153      emitForm_IMM(i);
1154      if (neg)
1155         code[0] |= 0x8000;
1156      if (i->saturate)
1157         code[0] |= 1 << 8;
1158   } else
1159   if (i->encSize == 8) {
1160      code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1161      if (neg)
1162         code[1] |= 0x08000000;
1163      if (i->saturate)
1164         code[1] |= 1 << 20;
1165      emitForm_MAD(i);
1166   } else {
1167      emitForm_MUL(i);
1168      if (neg)
1169         code[0] |= 0x8000;
1170      if (i->saturate)
1171         code[0] |= 1 << 8;
1172   }
1173}
1174
1175void
1176CodeEmitterNV50::emitDMUL(const Instruction *i)
1177{
1178   const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1179
1180   assert(!i->saturate);
1181   assert(i->encSize == 8);
1182
1183   code[1] = 0x80000000;
1184   code[0] = 0xe0000000;
1185
1186   if (neg)
1187      code[1] |= 0x08000000;
1188
1189   roundMode_CVT(i->rnd);
1190
1191   emitForm_MAD(i);
1192}
1193
1194void
1195CodeEmitterNV50::emitIMAD(const Instruction *i)
1196{
1197   int mode;
1198   code[0] = 0x60000000;
1199
1200   assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1201   if (!isSignedType(i->sType))
1202      mode = 0;
1203   else if (i->saturate)
1204      mode = 2;
1205   else
1206      mode = 1;
1207
1208   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1209      code[1] = 0;
1210      emitForm_IMM(i);
1211      code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1212      if (i->flagsSrc >= 0) {
1213         assert(!(code[0] & 0x10400000));
1214         assert(SDATA(i->src(i->flagsSrc)).id == 0);
1215         code[0] |= 0x10400000;
1216      }
1217   } else
1218   if (i->encSize == 4) {
1219      emitForm_MUL(i);
1220      code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1221      if (i->flagsSrc >= 0) {
1222         assert(!(code[0] & 0x10400000));
1223         assert(SDATA(i->src(i->flagsSrc)).id == 0);
1224         code[0] |= 0x10400000;
1225      }
1226   } else {
1227      code[1] = mode << 29;
1228      emitForm_MAD(i);
1229
1230      if (i->flagsSrc >= 0) {
1231         // add with carry from $cX
1232         assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1233         code[1] |= 0xc << 24;
1234         srcId(i->src(i->flagsSrc), 32 + 12);
1235      }
1236   }
1237}
1238
1239void
1240CodeEmitterNV50::emitISAD(const Instruction *i)
1241{
1242   if (i->encSize == 8) {
1243      code[0] = 0x50000000;
1244      switch (i->sType) {
1245      case TYPE_U32: code[1] = 0x04000000; break;
1246      case TYPE_S32: code[1] = 0x0c000000; break;
1247      case TYPE_U16: code[1] = 0x00000000; break;
1248      case TYPE_S16: code[1] = 0x08000000; break;
1249      default:
1250         assert(0);
1251         break;
1252      }
1253      emitForm_MAD(i);
1254   } else {
1255      switch (i->sType) {
1256      case TYPE_U32: code[0] = 0x50008000; break;
1257      case TYPE_S32: code[0] = 0x50008100; break;
1258      case TYPE_U16: code[0] = 0x50000000; break;
1259      case TYPE_S16: code[0] = 0x50000100; break;
1260      default:
1261         assert(0);
1262         break;
1263      }
1264      emitForm_MUL(i);
1265   }
1266}
1267
1268static void
1269alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1270{
1271   int loc = entry->loc;
1272   int enc;
1273
1274   switch (data.alphatest) {
1275   case PIPE_FUNC_NEVER: enc = 0x0; break;
1276   case PIPE_FUNC_LESS: enc = 0x1; break;
1277   case PIPE_FUNC_EQUAL: enc = 0x2; break;
1278   case PIPE_FUNC_LEQUAL: enc = 0x3; break;
1279   case PIPE_FUNC_GREATER: enc = 0x4; break;
1280   case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
1281   case PIPE_FUNC_GEQUAL: enc = 0x6; break;
1282   default:
1283   case PIPE_FUNC_ALWAYS: enc = 0xf; break;
1284   }
1285
1286   code[loc + 1] &= ~(0x1f << 14);
1287   code[loc + 1] |= enc << 14;
1288}
1289
1290void
1291CodeEmitterNV50::emitSET(const Instruction *i)
1292{
1293   code[0] = 0x30000000;
1294   code[1] = 0x60000000;
1295
1296   switch (i->sType) {
1297   case TYPE_F64:
1298      code[0] = 0xe0000000;
1299      code[1] = 0xe0000000;
1300      break;
1301   case TYPE_F32: code[0] |= 0x80000000; break;
1302   case TYPE_S32: code[1] |= 0x0c000000; break;
1303   case TYPE_U32: code[1] |= 0x04000000; break;
1304   case TYPE_S16: code[1] |= 0x08000000; break;
1305   case TYPE_U16: break;
1306   default:
1307      assert(0);
1308      break;
1309   }
1310
1311   emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1312
1313   if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1314   if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1315   if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1316   if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1317
1318   emitForm_MAD(i);
1319
1320   if (i->subOp == 1) {
1321      addInterp(0, 0, alphatestSet);
1322   }
1323}
1324
1325void
1326CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1327{
1328   switch (rnd) {
1329   case ROUND_NI: code[1] |= 0x08000000; break;
1330   case ROUND_M:  code[1] |= 0x00020000; break;
1331   case ROUND_MI: code[1] |= 0x08020000; break;
1332   case ROUND_P:  code[1] |= 0x00040000; break;
1333   case ROUND_PI: code[1] |= 0x08040000; break;
1334   case ROUND_Z:  code[1] |= 0x00060000; break;
1335   case ROUND_ZI: code[1] |= 0x08060000; break;
1336   default:
1337      assert(rnd == ROUND_N);
1338      break;
1339   }
1340}
1341
1342void
1343CodeEmitterNV50::emitCVT(const Instruction *i)
1344{
1345   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1346   RoundMode rnd;
1347   DataType dType;
1348
1349   switch (i->op) {
1350   case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1351   case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1352   case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1353   default:
1354      rnd = i->rnd;
1355      break;
1356   }
1357
1358   if (i->op == OP_NEG && i->dType == TYPE_U32)
1359      dType = TYPE_S32;
1360   else
1361      dType = i->dType;
1362
1363   code[0] = 0xa0000000;
1364
1365   switch (dType) {
1366   case TYPE_F64:
1367      switch (i->sType) {
1368      case TYPE_F64: code[1] = 0xc4404000; break;
1369      case TYPE_S64: code[1] = 0x44414000; break;
1370      case TYPE_U64: code[1] = 0x44404000; break;
1371      case TYPE_F32: code[1] = 0xc4400000; break;
1372      case TYPE_S32: code[1] = 0x44410000; break;
1373      case TYPE_U32: code[1] = 0x44400000; break;
1374      default:
1375         assert(0);
1376         break;
1377      }
1378      break;
1379   case TYPE_S64:
1380      switch (i->sType) {
1381      case TYPE_F64: code[1] = 0x8c404000; break;
1382      case TYPE_F32: code[1] = 0x8c400000; break;
1383      default:
1384         assert(0);
1385         break;
1386      }
1387      break;
1388   case TYPE_U64:
1389      switch (i->sType) {
1390      case TYPE_F64: code[1] = 0x84404000; break;
1391      case TYPE_F32: code[1] = 0x84400000; break;
1392      default:
1393         assert(0);
1394         break;
1395      }
1396      break;
1397   case TYPE_F32:
1398      switch (i->sType) {
1399      case TYPE_F64: code[1] = 0xc0404000; break;
1400      case TYPE_S64: code[1] = 0x40414000; break;
1401      case TYPE_U64: code[1] = 0x40404000; break;
1402      case TYPE_F32: code[1] = 0xc4004000; break;
1403      case TYPE_S32: code[1] = 0x44014000; break;
1404      case TYPE_U32: code[1] = 0x44004000; break;
1405      case TYPE_F16: code[1] = 0xc4000000; break;
1406      case TYPE_U16: code[1] = 0x44000000; break;
1407      default:
1408         assert(0);
1409         break;
1410      }
1411      break;
1412   case TYPE_S32:
1413      switch (i->sType) {
1414      case TYPE_F64: code[1] = 0x88404000; break;
1415      case TYPE_F32: code[1] = 0x8c004000; break;
1416      case TYPE_S32: code[1] = 0x0c014000; break;
1417      case TYPE_U32: code[1] = 0x0c004000; break;
1418      case TYPE_F16: code[1] = 0x8c000000; break;
1419      case TYPE_S16: code[1] = 0x0c010000; break;
1420      case TYPE_U16: code[1] = 0x0c000000; break;
1421      case TYPE_S8:  code[1] = 0x0c018000; break;
1422      case TYPE_U8:  code[1] = 0x0c008000; break;
1423      default:
1424         assert(0);
1425         break;
1426      }
1427      break;
1428   case TYPE_U32:
1429      switch (i->sType) {
1430      case TYPE_F64: code[1] = 0x80404000; break;
1431      case TYPE_F32: code[1] = 0x84004000; break;
1432      case TYPE_S32: code[1] = 0x04014000; break;
1433      case TYPE_U32: code[1] = 0x04004000; break;
1434      case TYPE_F16: code[1] = 0x84000000; break;
1435      case TYPE_S16: code[1] = 0x04010000; break;
1436      case TYPE_U16: code[1] = 0x04000000; break;
1437      case TYPE_S8:  code[1] = 0x04018000; break;
1438      case TYPE_U8:  code[1] = 0x04008000; break;
1439      default:
1440         assert(0);
1441         break;
1442      }
1443      break;
1444   case TYPE_S16:
1445   case TYPE_U16:
1446   case TYPE_S8:
1447   case TYPE_U8:
1448   default:
1449      assert(0);
1450      break;
1451   }
1452   if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1453      code[1] |= 0x00004000;
1454
1455   roundMode_CVT(rnd);
1456
1457   switch (i->op) {
1458   case OP_ABS: code[1] |= 1 << 20; break;
1459   case OP_SAT: code[1] |= 1 << 19; break;
1460   case OP_NEG: code[1] |= 1 << 29; break;
1461   default:
1462      break;
1463   }
1464   code[1] ^= i->src(0).mod.neg() << 29;
1465   code[1] |= i->src(0).mod.abs() << 20;
1466   if (i->saturate)
1467      code[1] |= 1 << 19;
1468
1469   assert(i->op != OP_ABS || !i->src(0).mod.neg());
1470
1471   emitForm_MAD(i);
1472}
1473
1474void
1475CodeEmitterNV50::emitPreOp(const Instruction *i)
1476{
1477   code[0] = 0xb0000000;
1478   code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1479
1480   code[1] |= i->src(0).mod.abs() << 20;
1481   code[1] |= i->src(0).mod.neg() << 26;
1482
1483   emitForm_MAD(i);
1484}
1485
1486void
1487CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1488{
1489   code[0] = 0x90000000;
1490
1491   if (i->encSize == 4) {
1492      assert(i->op == OP_RCP);
1493      assert(!i->saturate);
1494      code[0] |= i->src(0).mod.abs() << 15;
1495      code[0] |= i->src(0).mod.neg() << 22;
1496      emitForm_MUL(i);
1497   } else {
1498      code[1] = subOp << 29;
1499      code[1] |= i->src(0).mod.abs() << 20;
1500      code[1] |= i->src(0).mod.neg() << 26;
1501      if (i->saturate) {
1502         assert(subOp == 6 && i->op == OP_EX2);
1503         code[1] |= 1 << 27;
1504      }
1505      emitForm_MAD(i);
1506   }
1507}
1508
1509void
1510CodeEmitterNV50::emitNOT(const Instruction *i)
1511{
1512   code[0] = 0xd0000000;
1513   code[1] = 0x0002c000;
1514
1515   switch (i->sType) {
1516   case TYPE_U32:
1517   case TYPE_S32:
1518      code[1] |= 0x04000000;
1519      break;
1520   default:
1521      break;
1522   }
1523   emitForm_MAD(i);
1524   setSrc(i, 0, 1);
1525}
1526
1527void
1528CodeEmitterNV50::emitLogicOp(const Instruction *i)
1529{
1530   code[0] = 0xd0000000;
1531   code[1] = 0;
1532
1533   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1534      switch (i->op) {
1535      case OP_OR:  code[0] |= 0x0100; break;
1536      case OP_XOR: code[0] |= 0x8000; break;
1537      default:
1538         assert(i->op == OP_AND);
1539         break;
1540      }
1541      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1542         code[0] |= 1 << 22;
1543
1544      emitForm_IMM(i);
1545   } else {
1546      switch (i->op) {
1547      case OP_AND: code[1] = 0x04000000; break;
1548      case OP_OR:  code[1] = 0x04004000; break;
1549      case OP_XOR: code[1] = 0x04008000; break;
1550      default:
1551         assert(0);
1552         break;
1553      }
1554      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1555         code[1] |= 1 << 16;
1556      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1557         code[1] |= 1 << 17;
1558
1559      emitForm_MAD(i);
1560   }
1561}
1562
1563void
1564CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1565{
1566   code[0] = 0x00000001 | (shl << 16);
1567   code[1] = 0xc0000000;
1568
1569   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1570
1571   setSrcFileBits(i, NV50_OP_ENC_IMM);
1572   setSrc(i, 0, 0);
1573   emitFlagsRd(i);
1574}
1575
1576void
1577CodeEmitterNV50::emitShift(const Instruction *i)
1578{
1579   if (i->def(0).getFile() == FILE_ADDRESS) {
1580      assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1581      emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1582   } else {
1583      code[0] = 0x30000001;
1584      code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1585      if (i->op == OP_SHR && isSignedType(i->sType))
1586          code[1] |= 1 << 27;
1587
1588      if (i->src(1).getFile() == FILE_IMMEDIATE) {
1589         code[1] |= 1 << 20;
1590         code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1591         defId(i->def(0), 2);
1592         srcId(i->src(0), 9);
1593         emitFlagsRd(i);
1594      } else {
1595         emitForm_MAD(i);
1596      }
1597   }
1598}
1599
1600void
1601CodeEmitterNV50::emitOUT(const Instruction *i)
1602{
1603   code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1604   code[1] = 0xc0000000;
1605
1606   emitFlagsRd(i);
1607}
1608
1609void
1610CodeEmitterNV50::emitTEX(const TexInstruction *i)
1611{
1612   code[0] = 0xf0000001;
1613   code[1] = 0x00000000;
1614
1615   switch (i->op) {
1616   case OP_TXB:
1617      code[1] = 0x20000000;
1618      break;
1619   case OP_TXL:
1620      code[1] = 0x40000000;
1621      break;
1622   case OP_TXF:
1623      code[0] |= 0x01000000;
1624      break;
1625   case OP_TXG:
1626      code[0] |= 0x01000000;
1627      code[1] = 0x80000000;
1628      break;
1629   case OP_TXLQ:
1630      code[1] = 0x60020000;
1631      break;
1632   default:
1633      assert(i->op == OP_TEX);
1634      break;
1635   }
1636
1637   code[0] |= i->tex.r << 9;
1638   code[0] |= i->tex.s << 17;
1639
1640   int argc = i->tex.target.getArgCount();
1641
1642   if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1643      argc += 1;
1644   if (i->tex.target.isShadow())
1645      argc += 1;
1646   assert(argc <= 4);
1647
1648   code[0] |= (argc - 1) << 22;
1649
1650   if (i->tex.target.isCube()) {
1651      code[0] |= 0x08000000;
1652   } else
1653   if (i->tex.useOffsets) {
1654      code[1] |= (i->tex.offset[0] & 0xf) << 24;
1655      code[1] |= (i->tex.offset[1] & 0xf) << 20;
1656      code[1] |= (i->tex.offset[2] & 0xf) << 16;
1657   }
1658
1659   code[0] |= (i->tex.mask & 0x3) << 25;
1660   code[1] |= (i->tex.mask & 0xc) << 12;
1661
1662   if (i->tex.liveOnly)
1663      code[1] |= 1 << 2;
1664   if (i->tex.derivAll)
1665      code[1] |= 1 << 3;
1666
1667   defId(i->def(0), 2);
1668
1669   emitFlagsRd(i);
1670}
1671
1672void
1673CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1674{
1675   assert(i->tex.query == TXQ_DIMS);
1676
1677   code[0] = 0xf0000001;
1678   code[1] = 0x60000000;
1679
1680   code[0] |= i->tex.r << 9;
1681   code[0] |= i->tex.s << 17;
1682
1683   code[0] |= (i->tex.mask & 0x3) << 25;
1684   code[1] |= (i->tex.mask & 0xc) << 12;
1685
1686   defId(i->def(0), 2);
1687
1688   emitFlagsRd(i);
1689}
1690
1691void
1692CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1693{
1694   code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1695   code[1] = 0x60010000;
1696
1697   code[0] |= (i->tex.mask & 0x3) << 25;
1698   code[1] |= (i->tex.mask & 0xc) << 12;
1699   defId(i->def(0), 2);
1700
1701   emitFlagsRd(i);
1702}
1703
1704void
1705CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1706{
1707   uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1708
1709   code[0] = 0x10000003; // bra
1710   code[1] = 0x00000780; // always
1711
1712   switch (i->subOp) {
1713   case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1714      break;
1715   case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1716      pos += 8;
1717      break;
1718   default:
1719      assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1720      code[0] = 0x20000003; // call
1721      code[1] = 0x00000000; // no predicate
1722      break;
1723   }
1724   addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1725   addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1726}
1727
1728void
1729CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1730{
1731   const FlowInstruction *f = i->asFlow();
1732   bool hasPred = false;
1733   bool hasTarg = false;
1734
1735   code[0] = 0x00000003 | (flowOp << 28);
1736   code[1] = 0x00000000;
1737
1738   switch (i->op) {
1739   case OP_BRA:
1740      hasPred = true;
1741      hasTarg = true;
1742      break;
1743   case OP_BREAK:
1744   case OP_BRKPT:
1745   case OP_DISCARD:
1746   case OP_RET:
1747      hasPred = true;
1748      break;
1749   case OP_CALL:
1750   case OP_PREBREAK:
1751   case OP_JOINAT:
1752      hasTarg = true;
1753      break;
1754   case OP_PRERET:
1755      hasTarg = true;
1756      if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1757         emitPRERETEmu(f);
1758         return;
1759      }
1760      break;
1761   default:
1762      break;
1763   }
1764
1765   if (hasPred)
1766      emitFlagsRd(i);
1767
1768   if (hasTarg && f) {
1769      uint32_t pos;
1770
1771      if (f->op == OP_CALL) {
1772         if (f->builtin) {
1773            pos = targNV50->getBuiltinOffset(f->target.builtin);
1774         } else {
1775            pos = f->target.fn->binPos;
1776         }
1777      } else {
1778         pos = f->target.bb->binPos;
1779      }
1780
1781      code[0] |= ((pos >>  2) & 0xffff) << 11;
1782      code[1] |= ((pos >> 18) & 0x003f) << 14;
1783
1784      RelocEntry::Type relocTy;
1785
1786      relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1787
1788      addReloc(relocTy, 0, pos, 0x07fff800, 9);
1789      addReloc(relocTy, 1, pos, 0x000fc000, -4);
1790   }
1791}
1792
1793void
1794CodeEmitterNV50::emitBAR(const Instruction *i)
1795{
1796   ImmediateValue *barId = i->getSrc(0)->asImm();
1797   assert(barId);
1798
1799   code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1800   code[1] = 0x00004000;
1801
1802   if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1803      code[0] |= 1 << 26;
1804}
1805
1806void
1807CodeEmitterNV50::emitATOM(const Instruction *i)
1808{
1809   uint8_t subOp;
1810   switch (i->subOp) {
1811   case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
1812   case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
1813   case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
1814   case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
1815   case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
1816   case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
1817   case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
1818   case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
1819   case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
1820   case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1821   default:
1822      assert(!"invalid subop");
1823      return;
1824   }
1825   code[0] = 0xd0000001;
1826   code[1] = 0xe0c00000 | (subOp << 2);
1827   if (isSignedType(i->dType))
1828      code[1] |= 1 << 21;
1829
1830   // args
1831   emitFlagsRd(i);
1832   setDst(i, 0);
1833   setSrc(i, 1, 1);
1834   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1835      setSrc(i, 2, 2);
1836
1837   // g[] pointer
1838   code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1839   srcId(i->getIndirect(0, 0), 9);
1840}
1841
1842bool
1843CodeEmitterNV50::emitInstruction(Instruction *insn)
1844{
1845   if (!insn->encSize) {
1846      ERROR("skipping unencodable instruction: "); insn->print();
1847      return false;
1848   } else
1849   if (codeSize + insn->encSize > codeSizeLimit) {
1850      ERROR("code emitter output buffer too small\n");
1851      return false;
1852   }
1853
1854   if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1855      INFO("EMIT: "); insn->print();
1856   }
1857
1858   switch (insn->op) {
1859   case OP_MOV:
1860      emitMOV(insn);
1861      break;
1862   case OP_EXIT:
1863   case OP_NOP:
1864   case OP_JOIN:
1865      emitNOP();
1866      break;
1867   case OP_VFETCH:
1868   case OP_LOAD:
1869      emitLOAD(insn);
1870      break;
1871   case OP_EXPORT:
1872   case OP_STORE:
1873      emitSTORE(insn);
1874      break;
1875   case OP_PFETCH:
1876      emitPFETCH(insn);
1877      break;
1878   case OP_RDSV:
1879      emitRDSV(insn);
1880      break;
1881   case OP_LINTERP:
1882   case OP_PINTERP:
1883      emitINTERP(insn);
1884      break;
1885   case OP_ADD:
1886   case OP_SUB:
1887      if (insn->dType == TYPE_F64)
1888         emitDADD(insn);
1889      else if (isFloatType(insn->dType))
1890         emitFADD(insn);
1891      else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1892         emitAADD(insn);
1893      else
1894         emitUADD(insn);
1895      break;
1896   case OP_MUL:
1897      if (insn->dType == TYPE_F64)
1898         emitDMUL(insn);
1899      else if (isFloatType(insn->dType))
1900         emitFMUL(insn);
1901      else
1902         emitIMUL(insn);
1903      break;
1904   case OP_MAD:
1905   case OP_FMA:
1906      if (insn->dType == TYPE_F64)
1907         emitDMAD(insn);
1908      else if (isFloatType(insn->dType))
1909         emitFMAD(insn);
1910      else
1911         emitIMAD(insn);
1912      break;
1913   case OP_SAD:
1914      emitISAD(insn);
1915      break;
1916   case OP_NOT:
1917      emitNOT(insn);
1918      break;
1919   case OP_AND:
1920   case OP_OR:
1921   case OP_XOR:
1922      emitLogicOp(insn);
1923      break;
1924   case OP_SHL:
1925   case OP_SHR:
1926      emitShift(insn);
1927      break;
1928   case OP_SET:
1929      emitSET(insn);
1930      break;
1931   case OP_MIN:
1932   case OP_MAX:
1933      emitMINMAX(insn);
1934      break;
1935   case OP_CEIL:
1936   case OP_FLOOR:
1937   case OP_TRUNC:
1938   case OP_ABS:
1939   case OP_NEG:
1940   case OP_SAT:
1941      emitCVT(insn);
1942      break;
1943   case OP_CVT:
1944      if (insn->def(0).getFile() == FILE_ADDRESS)
1945         emitARL(insn, 0);
1946      else
1947      if (insn->def(0).getFile() == FILE_FLAGS ||
1948          insn->src(0).getFile() == FILE_FLAGS ||
1949          insn->src(0).getFile() == FILE_ADDRESS)
1950         emitMOV(insn);
1951      else
1952         emitCVT(insn);
1953      break;
1954   case OP_RCP:
1955      emitSFnOp(insn, 0);
1956      break;
1957   case OP_RSQ:
1958      emitSFnOp(insn, 2);
1959      break;
1960   case OP_LG2:
1961      emitSFnOp(insn, 3);
1962      break;
1963   case OP_SIN:
1964      emitSFnOp(insn, 4);
1965      break;
1966   case OP_COS:
1967      emitSFnOp(insn, 5);
1968      break;
1969   case OP_EX2:
1970      emitSFnOp(insn, 6);
1971      break;
1972   case OP_PRESIN:
1973   case OP_PREEX2:
1974      emitPreOp(insn);
1975      break;
1976   case OP_TEX:
1977   case OP_TXB:
1978   case OP_TXL:
1979   case OP_TXF:
1980   case OP_TXG:
1981   case OP_TXLQ:
1982      emitTEX(insn->asTex());
1983      break;
1984   case OP_TXQ:
1985      emitTXQ(insn->asTex());
1986      break;
1987   case OP_TEXPREP:
1988      emitTEXPREP(insn->asTex());
1989      break;
1990   case OP_EMIT:
1991   case OP_RESTART:
1992      emitOUT(insn);
1993      break;
1994   case OP_DISCARD:
1995      emitFlow(insn, 0x0);
1996      break;
1997   case OP_BRA:
1998      emitFlow(insn, 0x1);
1999      break;
2000   case OP_CALL:
2001      emitFlow(insn, 0x2);
2002      break;
2003   case OP_RET:
2004      emitFlow(insn, 0x3);
2005      break;
2006   case OP_PREBREAK:
2007      emitFlow(insn, 0x4);
2008      break;
2009   case OP_BREAK:
2010      emitFlow(insn, 0x5);
2011      break;
2012   case OP_QUADON:
2013      emitFlow(insn, 0x6);
2014      break;
2015   case OP_QUADPOP:
2016      emitFlow(insn, 0x7);
2017      break;
2018   case OP_JOINAT:
2019      emitFlow(insn, 0xa);
2020      break;
2021   case OP_PRERET:
2022      emitFlow(insn, 0xd);
2023      break;
2024   case OP_QUADOP:
2025      emitQUADOP(insn, insn->lanes, insn->subOp);
2026      break;
2027   case OP_DFDX:
2028      emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2029      break;
2030   case OP_DFDY:
2031      emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2032      break;
2033   case OP_ATOM:
2034      emitATOM(insn);
2035      break;
2036   case OP_BAR:
2037      emitBAR(insn);
2038      break;
2039   case OP_PHI:
2040   case OP_UNION:
2041   case OP_CONSTRAINT:
2042      ERROR("operation should have been eliminated\n");
2043      return false;
2044   case OP_EXP:
2045   case OP_LOG:
2046   case OP_SQRT:
2047   case OP_POW:
2048   case OP_SELP:
2049   case OP_SLCT:
2050   case OP_TXD:
2051   case OP_PRECONT:
2052   case OP_CONT:
2053   case OP_POPCNT:
2054   case OP_INSBF:
2055   case OP_EXTBF:
2056      ERROR("operation should have been lowered\n");
2057      return false;
2058   default:
2059      ERROR("unknown op: %u\n", insn->op);
2060      return false;
2061   }
2062   if (insn->join || insn->op == OP_JOIN)
2063      code[1] |= 0x2;
2064   else
2065   if (insn->exit || insn->op == OP_EXIT)
2066      code[1] |= 0x1;
2067
2068   assert((insn->encSize == 8) == (code[0] & 1));
2069
2070   code += insn->encSize / 4;
2071   codeSize += insn->encSize;
2072   return true;
2073}
2074
2075uint32_t
2076CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2077{
2078   const Target::OpInfo &info = targ->getOpInfo(i);
2079
2080   if (info.minEncSize > 4 || i->dType == TYPE_F64)
2081      return 8;
2082
2083   // check constraints on dst and src operands
2084   for (int d = 0; i->defExists(d); ++d) {
2085      if (i->def(d).rep()->reg.data.id > 63 ||
2086          i->def(d).rep()->reg.file != FILE_GPR)
2087         return 8;
2088   }
2089
2090   for (int s = 0; i->srcExists(s); ++s) {
2091      DataFile sf = i->src(s).getFile();
2092      if (sf != FILE_GPR)
2093         if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2094            return 8;
2095      if (i->src(s).rep()->reg.data.id > 63)
2096         return 8;
2097   }
2098
2099   // check modifiers & rounding
2100   if (i->join || i->lanes != 0xf || i->exit)
2101      return 8;
2102   if (i->op == OP_MUL && i->rnd != ROUND_N)
2103      return 8;
2104
2105   if (i->asTex())
2106      return 8; // TODO: short tex encoding
2107
2108   // check constraints on short MAD
2109   if (info.srcNr >= 2 && i->srcExists(2)) {
2110      if (!i->defExists(0) ||
2111          (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2112          DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2113         return 8;
2114   }
2115
2116   return info.minEncSize;
2117}
2118
2119// Change the encoding size of an instruction after BBs have been scheduled.
2120static void
2121makeInstructionLong(Instruction *insn)
2122{
2123   if (insn->encSize == 8)
2124      return;
2125   Function *fn = insn->bb->getFunction();
2126   int n = 0;
2127   int adj = 4;
2128
2129   for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2130
2131   if (n & 1) {
2132      adj = 8;
2133      insn->next->encSize = 8;
2134   } else
2135   if (insn->prev && insn->prev->encSize == 4) {
2136      adj = 8;
2137      insn->prev->encSize = 8;
2138   }
2139   insn->encSize = 8;
2140
2141   for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2142      fn->bbArray[i]->binPos += adj;
2143   }
2144   fn->binSize += adj;
2145   insn->bb->binSize += adj;
2146}
2147
2148static bool
2149trySetExitModifier(Instruction *insn)
2150{
2151   if (insn->op == OP_DISCARD ||
2152       insn->op == OP_QUADON ||
2153       insn->op == OP_QUADPOP)
2154      return false;
2155   for (int s = 0; insn->srcExists(s); ++s)
2156      if (insn->src(s).getFile() == FILE_IMMEDIATE)
2157         return false;
2158   if (insn->asFlow()) {
2159      if (insn->op == OP_CALL) // side effects !
2160         return false;
2161      if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2162         return false;
2163      insn->op = OP_EXIT;
2164   }
2165   insn->exit = 1;
2166   makeInstructionLong(insn);
2167   return true;
2168}
2169
2170static void
2171replaceExitWithModifier(Function *func)
2172{
2173   BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2174
2175   if (!epilogue->getExit() ||
2176       epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2177      return;
2178
2179   if (epilogue->getEntry()->op != OP_EXIT) {
2180      Instruction *insn = epilogue->getExit()->prev;
2181      if (!insn || !trySetExitModifier(insn))
2182         return;
2183      insn->exit = 1;
2184   } else {
2185      for (Graph::EdgeIterator ei = func->cfgExit->incident();
2186           !ei.end(); ei.next()) {
2187         BasicBlock *bb = BasicBlock::get(ei.getNode());
2188         Instruction *i = bb->getExit();
2189
2190         if (!i || !trySetExitModifier(i))
2191            return;
2192      }
2193   }
2194
2195   int adj = epilogue->getExit()->encSize;
2196   epilogue->binSize -= adj;
2197   func->binSize -= adj;
2198   delete_Instruction(func->getProgram(), epilogue->getExit());
2199
2200   // There may be BB's that are laid out after the exit block
2201   for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
2202      func->bbArray[i]->binPos -= adj;
2203   }
2204}
2205
2206void
2207CodeEmitterNV50::prepareEmission(Function *func)
2208{
2209   CodeEmitter::prepareEmission(func);
2210
2211   replaceExitWithModifier(func);
2212}
2213
2214CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2215   CodeEmitter(target), targNV50(target)
2216{
2217   targ = target; // specialized
2218   code = NULL;
2219   codeSize = codeSizeLimit = 0;
2220   relocInfo = NULL;
2221}
2222
2223CodeEmitter *
2224TargetNV50::getCodeEmitter(Program::Type type)
2225{
2226   CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2227   emit->setProgramType(type);
2228   return emit;
2229}
2230
2231} // namespace nv50_ir
2232