1
2#include "nv50_ir.h"
3#include "nv50_ir_target.h"
4#include "nv50_ir_build_util.h"
5
6#include "nv50_ir_from_sm4.h"
7
8// WTF: pass-through is implicit ??? check ReadWriteMask
9
10namespace tgsi {
11
12static nv50_ir::SVSemantic irSemantic(unsigned sn)
13{
14   switch (sn) {
15   case TGSI_SEMANTIC_POSITION:      return nv50_ir::SV_POSITION;
16   case TGSI_SEMANTIC_FACE:          return nv50_ir::SV_FACE;
17   case NV50_SEMANTIC_LAYER:         return nv50_ir::SV_LAYER;
18   case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX;
19   case TGSI_SEMANTIC_PSIZE:         return nv50_ir::SV_POINT_SIZE;
20   case NV50_SEMANTIC_CLIPDISTANCE:  return nv50_ir::SV_CLIP_DISTANCE;
21   case TGSI_SEMANTIC_VERTEXID:      return nv50_ir::SV_VERTEX_ID;
22   case TGSI_SEMANTIC_INSTANCEID:    return nv50_ir::SV_INSTANCE_ID;
23   case TGSI_SEMANTIC_PRIMID:        return nv50_ir::SV_PRIMITIVE_ID;
24   case NV50_SEMANTIC_TESSFACTOR:    return nv50_ir::SV_TESS_FACTOR;
25   case NV50_SEMANTIC_TESSCOORD:     return nv50_ir::SV_TESS_COORD;
26   default:
27      return nv50_ir::SV_UNDEFINED;
28   }
29}
30
31} // namespace tgsi
32
33namespace {
34
35using namespace nv50_ir;
36
37#define NV50_IR_MAX_RESOURCES 64
38
39class Converter : public BuildUtil
40{
41public:
42   Converter(Program *, struct nv50_ir_prog_info *);
43   ~Converter();
44
45private:
46   DataArray tData32;
47   DataArray tData64;
48   unsigned int nrRegVals;
49
50   DataArray *lData;
51   unsigned int nrArrays;
52   unsigned int arrayVol;
53
54   DataArray oData;
55
56   uint8_t interpMode[PIPE_MAX_SHADER_INPUTS];
57
58   // outputs for each phase
59   struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS];
60
61   int phase;
62   int subPhaseCnt[2];
63   int subPhase;
64   unsigned int phaseStart;
65   unsigned int phaseInstance;
66   unsigned int *phaseInstCnt[2];
67   bool unrollPhase;
68   bool phaseInstanceUsed;
69   int phaseEnded; // (phase + 1) if $phase ended
70
71   bool finalized;
72
73   Value *srcPtr[3][3]; // for indirect addressing, save pointer values
74   Value *dstPtr[3];
75   Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP)
76
77   Value *domainPt[3]; // pre-fetched TessCoord
78
79   unsigned int nDstOpnds;
80
81   Stack condBBs;
82   Stack joinBBs;
83   Stack loopBBs;
84   Stack breakBBs;
85   Stack entryBBs;
86   Stack leaveBBs;
87   Stack retIPs;
88
89   bool shadow[NV50_IR_MAX_RESOURCES];
90   TexTarget resourceType[NV50_IR_MAX_RESOURCES][2];
91
92   struct nv50_ir_prog_info& info;
93
94   Value *fragCoord[4];
95
96public:
97   bool run();
98
99private:
100   bool handleInstruction(unsigned int pos);
101   bool inspectInstruction(unsigned int pos);
102   bool handleDeclaration(const sm4_dcl& dcl);
103   bool inspectDeclaration(const sm4_dcl& dcl);
104   bool parseSignature();
105
106   bool haveNextPhase(unsigned int pos) const;
107
108   void allocateValues();
109   void exportOutputs();
110
111   void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]);
112   void handleLOAD(Value *dst0[4]);
113   void handleSAMPLE(operation, Value *dst0[4]);
114   void handleQUERY(Value *dst0[4], enum TexQuery query);
115   void handleDP(Value *dst0[4], int dim);
116
117   Symbol *iSym(int i, int c);
118   Symbol *oSym(int i, int c);
119
120   Value *src(int i, int c);
121   Value *src(const sm4_op&, int c, int i);
122   Value *dst(int i, int c);
123   Value *dst(const sm4_op&, int c, int i);
124   void saveDst(int i, int c, Value *value);
125   void saveDst(const sm4_op&, int c, Value *value, int i);
126   void saveFragDepth(operation op, Value *value);
127
128   Value *interpolate(const sm4_op&, int c, int i);
129
130   Value *getSrcPtr(int s, int dim, int shl);
131   Value *getDstPtr(int d, int dim, int shl);
132   Value *getVtxPtr(int s);
133
134   bool checkDstSrcAliasing() const;
135   void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
136   void finalizeShader();
137
138   operation cvtOpcode(enum sm4_opcode op) const;
139   unsigned int getDstOpndCount(enum sm4_opcode opcode) const;
140
141   DataType inferSrcType(enum sm4_opcode op) const;
142   DataType inferDstType(enum sm4_opcode op) const;
143
144   unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const;
145   CondCode cvtCondCode(enum sm4_opcode op) const;
146   RoundMode cvtRoundingMode(enum sm4_opcode op) const;
147   TexTarget cvtTexTarget(enum sm4_target,
148                           enum sm4_opcode, operation *) const;
149   SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const;
150   uint8_t cvtInterpMode(enum sm4_interpolation) const;
151
152   unsigned tgsiSemantic(SVSemantic, int index);
153   void recordSV(unsigned sn, unsigned si, unsigned mask, bool input);
154
155private:
156   sm4_insn *insn;
157   DataType dTy, sTy;
158
159   const struct sm4_program& sm4;
160   Program *prog;
161};
162
163#define PRIM_CASE(a, b) \
164   case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
165
166unsigned
167Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const
168{
169   switch (prim) {
170   PRIM_CASE(UNDEFINED, POINTS);
171   PRIM_CASE(POINTLIST, POINTS);
172   PRIM_CASE(LINELIST, LINES);
173   PRIM_CASE(LINESTRIP, LINE_STRIP);
174   PRIM_CASE(TRIANGLELIST, TRIANGLES);
175   PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP);
176   PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY);
177   PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY);
178   PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY);
179   PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY);
180   default:
181      if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST ||
182          prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST)
183         return PIPE_PRIM_POINTS;
184      if (patchSize)
185         *patchSize =
186            prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1;
187      return NV50_PRIM_PATCHES;
188   }
189}
190
191#define IPM_CASE(n, a, b) \
192   case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
193
194uint8_t
195Converter::cvtInterpMode(enum sm4_interpolation mode) const
196{
197   switch (mode) {
198   IPM_CASE(CONSTANT,                      FLAT, FLAT);
199   IPM_CASE(LINEAR,                        PERSPECTIVE, PERSPECTIVE);
200   IPM_CASE(LINEAR_CENTROID,               PERSPECTIVE, CENTROID);
201   IPM_CASE(LINEAR_NOPERSPECTIVE,          LINEAR, LINEAR);
202   IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID);
203   IPM_CASE(LINEAR_SAMPLE,                 PERSPECTIVE, OFFSET);
204   IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE,   LINEAR, OFFSET);
205   IPM_CASE(UNDEFINED,                     LINEAR, LINEAR);
206   default:
207      assert(!"invalid interpolation mode");
208      return 0;
209   }
210}
211
212static void
213setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode)
214{
215   switch (mode & NV50_IR_INTERP_MODE_MASK) {
216   case NV50_IR_INTERP_LINEAR:
217      var->linear = 1;
218      break;
219   case NV50_IR_INTERP_FLAT:
220      var->flat = 1;
221      break;
222   default:
223      break;
224   }
225   if (mode & NV50_IR_INTERP_CENTROID)
226      var->centroid = 1;
227}
228
229RoundMode
230Converter::cvtRoundingMode(enum sm4_opcode op) const
231{
232   switch (op) {
233   case SM4_OPCODE_ROUND_NE: return ROUND_NI;
234   case SM4_OPCODE_ROUND_NI: return ROUND_MI;
235   case SM4_OPCODE_ROUND_PI: return ROUND_PI;
236   case SM4_OPCODE_ROUND_Z:  return ROUND_ZI;
237   default:
238      return ROUND_N;
239   }
240}
241
242CondCode
243Converter::cvtCondCode(enum sm4_opcode op) const
244{
245   switch (op) {
246   case SM4_OPCODE_EQ:
247   case SM4_OPCODE_DEQ:
248   case SM4_OPCODE_IEQ: return CC_EQ;
249   case SM4_OPCODE_GE:
250   case SM4_OPCODE_DGE:
251   case SM4_OPCODE_IGE:
252   case SM4_OPCODE_UGE: return CC_GE;
253   case SM4_OPCODE_LT:
254   case SM4_OPCODE_DLT:
255   case SM4_OPCODE_ILT:
256   case SM4_OPCODE_ULT: return CC_LT;
257   case SM4_OPCODE_NE:
258   case SM4_OPCODE_INE:
259   case SM4_OPCODE_DNE: return CC_NEU;
260   default:
261      return CC_ALWAYS;
262   }
263}
264
265DataType
266Converter::inferSrcType(enum sm4_opcode op) const
267{
268   switch (op) {
269   case SM4_OPCODE_IADD:
270   case SM4_OPCODE_IEQ:
271   case SM4_OPCODE_IGE:
272   case SM4_OPCODE_ILT:
273   case SM4_OPCODE_IMAD:
274   case SM4_OPCODE_IMAX:
275   case SM4_OPCODE_IMIN:
276   case SM4_OPCODE_IMUL:
277   case SM4_OPCODE_INE:
278   case SM4_OPCODE_INEG:
279   case SM4_OPCODE_ISHL:
280   case SM4_OPCODE_ISHR:
281   case SM4_OPCODE_ITOF:
282   case SM4_OPCODE_ATOMIC_IADD:
283   case SM4_OPCODE_ATOMIC_IMAX:
284   case SM4_OPCODE_ATOMIC_IMIN:
285      return TYPE_S32;
286   case SM4_OPCODE_AND:
287   case SM4_OPCODE_NOT:
288   case SM4_OPCODE_OR:
289   case SM4_OPCODE_UDIV:
290   case SM4_OPCODE_ULT:
291   case SM4_OPCODE_UGE:
292   case SM4_OPCODE_UMUL:
293   case SM4_OPCODE_UMAD:
294   case SM4_OPCODE_UMAX:
295   case SM4_OPCODE_UMIN:
296   case SM4_OPCODE_USHR:
297   case SM4_OPCODE_UTOF:
298   case SM4_OPCODE_XOR:
299   case SM4_OPCODE_UADDC:
300   case SM4_OPCODE_USUBB:
301   case SM4_OPCODE_ATOMIC_AND:
302   case SM4_OPCODE_ATOMIC_OR:
303   case SM4_OPCODE_ATOMIC_XOR:
304   case SM4_OPCODE_ATOMIC_UMAX:
305   case SM4_OPCODE_ATOMIC_UMIN:
306      return TYPE_U32;
307   case SM4_OPCODE_DADD:
308   case SM4_OPCODE_DMAX:
309   case SM4_OPCODE_DMIN:
310   case SM4_OPCODE_DMUL:
311   case SM4_OPCODE_DEQ:
312   case SM4_OPCODE_DGE:
313   case SM4_OPCODE_DLT:
314   case SM4_OPCODE_DNE:
315   case SM4_OPCODE_DMOV:
316   case SM4_OPCODE_DMOVC:
317   case SM4_OPCODE_DTOF:
318      return TYPE_F64;
319   case SM4_OPCODE_F16TOF32:
320      return TYPE_F16;
321   default:
322      return TYPE_F32;
323   }
324}
325
326DataType
327Converter::inferDstType(enum sm4_opcode op) const
328{
329   switch (op) {
330   case SM4_OPCODE_FTOI:
331      return TYPE_S32;
332   case SM4_OPCODE_FTOU:
333   case SM4_OPCODE_EQ:
334   case SM4_OPCODE_GE:
335   case SM4_OPCODE_LT:
336   case SM4_OPCODE_NE:
337      return TYPE_U32;
338   case SM4_OPCODE_FTOD:
339      return TYPE_F64;
340   case SM4_OPCODE_F32TOF16:
341      return TYPE_F16;
342   case SM4_OPCODE_ITOF:
343   case SM4_OPCODE_UTOF:
344   case SM4_OPCODE_DTOF:
345      return TYPE_F32;
346   default:
347      return inferSrcType(op);
348   }
349}
350
351operation
352Converter::cvtOpcode(enum sm4_opcode op) const
353{
354   switch (op) {
355   case SM4_OPCODE_ADD:         return OP_ADD;
356   case SM4_OPCODE_AND:         return OP_AND;
357   case SM4_OPCODE_BREAK:       return OP_BREAK;
358   case SM4_OPCODE_BREAKC:      return OP_BREAK;
359   case SM4_OPCODE_CALL:        return OP_CALL;
360   case SM4_OPCODE_CALLC:       return OP_CALL;
361   case SM4_OPCODE_CASE:        return OP_NOP;
362   case SM4_OPCODE_CONTINUE:    return OP_CONT;
363   case SM4_OPCODE_CONTINUEC:   return OP_CONT;
364   case SM4_OPCODE_CUT:         return OP_RESTART;
365   case SM4_OPCODE_DEFAULT:     return OP_NOP;
366   case SM4_OPCODE_DERIV_RTX:   return OP_DFDX;
367   case SM4_OPCODE_DERIV_RTY:   return OP_DFDY;
368   case SM4_OPCODE_DISCARD:     return OP_DISCARD;
369   case SM4_OPCODE_DIV:         return OP_DIV;
370   case SM4_OPCODE_DP2:         return OP_MAD;
371   case SM4_OPCODE_DP3:         return OP_MAD;
372   case SM4_OPCODE_DP4:         return OP_MAD;
373   case SM4_OPCODE_ELSE:        return OP_BRA;
374   case SM4_OPCODE_EMIT:        return OP_EMIT;
375   case SM4_OPCODE_EMITTHENCUT: return OP_EMIT;
376   case SM4_OPCODE_ENDIF:       return OP_BRA;
377   case SM4_OPCODE_ENDLOOP:     return OP_PREBREAK;
378   case SM4_OPCODE_ENDSWITCH:   return OP_NOP;
379   case SM4_OPCODE_EQ:          return OP_SET;
380   case SM4_OPCODE_EXP:         return OP_EX2;
381   case SM4_OPCODE_FRC:         return OP_CVT;
382   case SM4_OPCODE_FTOI:        return OP_CVT;
383   case SM4_OPCODE_FTOU:        return OP_CVT;
384   case SM4_OPCODE_GE:          return OP_SET;
385   case SM4_OPCODE_IADD:        return OP_ADD;
386   case SM4_OPCODE_IF:          return OP_BRA;
387   case SM4_OPCODE_IEQ:         return OP_SET;
388   case SM4_OPCODE_IGE:         return OP_SET;
389   case SM4_OPCODE_ILT:         return OP_SET;
390   case SM4_OPCODE_IMAD:        return OP_MAD;
391   case SM4_OPCODE_IMAX:        return OP_MAX;
392   case SM4_OPCODE_IMIN:        return OP_MIN;
393   case SM4_OPCODE_IMUL:        return OP_MUL;
394   case SM4_OPCODE_INE:         return OP_SET;
395   case SM4_OPCODE_INEG:        return OP_NEG;
396   case SM4_OPCODE_ISHL:        return OP_SHL;
397   case SM4_OPCODE_ISHR:        return OP_SHR;
398   case SM4_OPCODE_ITOF:        return OP_CVT;
399   case SM4_OPCODE_LD:          return OP_TXF;
400   case SM4_OPCODE_LD_MS:       return OP_TXF;
401   case SM4_OPCODE_LOG:         return OP_LG2;
402   case SM4_OPCODE_LOOP:        return OP_PRECONT;
403   case SM4_OPCODE_LT:          return OP_SET;
404   case SM4_OPCODE_MAD:         return OP_MAD;
405   case SM4_OPCODE_MIN:         return OP_MIN;
406   case SM4_OPCODE_MAX:         return OP_MAX;
407   case SM4_OPCODE_MOV:         return OP_MOV;
408   case SM4_OPCODE_MOVC:        return OP_MOV;
409   case SM4_OPCODE_MUL:         return OP_MUL;
410   case SM4_OPCODE_NE:          return OP_SET;
411   case SM4_OPCODE_NOP:         return OP_NOP;
412   case SM4_OPCODE_NOT:         return OP_NOT;
413   case SM4_OPCODE_OR:          return OP_OR;
414   case SM4_OPCODE_RESINFO:     return OP_TXQ;
415   case SM4_OPCODE_RET:         return OP_RET;
416   case SM4_OPCODE_RETC:        return OP_RET;
417   case SM4_OPCODE_ROUND_NE:    return OP_CVT;
418   case SM4_OPCODE_ROUND_NI:    return OP_FLOOR;
419   case SM4_OPCODE_ROUND_PI:    return OP_CEIL;
420   case SM4_OPCODE_ROUND_Z:     return OP_TRUNC;
421   case SM4_OPCODE_RSQ:         return OP_RSQ;
422   case SM4_OPCODE_SAMPLE:      return OP_TEX;
423   case SM4_OPCODE_SAMPLE_C:    return OP_TEX;
424   case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX;
425   case SM4_OPCODE_SAMPLE_L:    return OP_TXL;
426   case SM4_OPCODE_SAMPLE_D:    return OP_TXD;
427   case SM4_OPCODE_SAMPLE_B:    return OP_TXB;
428   case SM4_OPCODE_SQRT:        return OP_SQRT;
429   case SM4_OPCODE_SWITCH:      return OP_NOP;
430   case SM4_OPCODE_SINCOS:      return OP_PRESIN;
431   case SM4_OPCODE_UDIV:        return OP_DIV;
432   case SM4_OPCODE_ULT:         return OP_SET;
433   case SM4_OPCODE_UGE:         return OP_SET;
434   case SM4_OPCODE_UMUL:        return OP_MUL;
435   case SM4_OPCODE_UMAD:        return OP_MAD;
436   case SM4_OPCODE_UMAX:        return OP_MAX;
437   case SM4_OPCODE_UMIN:        return OP_MIN;
438   case SM4_OPCODE_USHR:        return OP_SHR;
439   case SM4_OPCODE_UTOF:        return OP_CVT;
440   case SM4_OPCODE_XOR:         return OP_XOR;
441
442   case SM4_OPCODE_GATHER4:            return OP_TXG;
443   case SM4_OPCODE_SAMPLE_POS:         return OP_PIXLD;
444   case SM4_OPCODE_SAMPLE_INFO:        return OP_PIXLD;
445   case SM4_OPCODE_EMIT_STREAM:        return OP_EMIT;
446   case SM4_OPCODE_CUT_STREAM:         return OP_RESTART;
447   case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT;
448   case SM4_OPCODE_INTERFACE_CALL:     return OP_CALL;
449   case SM4_OPCODE_BUFINFO:            return OP_TXQ;
450   case SM4_OPCODE_DERIV_RTX_COARSE:   return OP_DFDX;
451   case SM4_OPCODE_DERIV_RTX_FINE:     return OP_DFDX;
452   case SM4_OPCODE_DERIV_RTY_COARSE:   return OP_DFDY;
453   case SM4_OPCODE_DERIV_RTY_FINE:     return OP_DFDY;
454   case SM4_OPCODE_GATHER4_C:          return OP_TXG;
455   case SM4_OPCODE_GATHER4_PO:         return OP_TXG;
456   case SM4_OPCODE_GATHER4_PO_C:       return OP_TXG;
457
458   case SM4_OPCODE_RCP:       return OP_RCP;
459   case SM4_OPCODE_F32TOF16:  return OP_CVT;
460   case SM4_OPCODE_F16TOF32:  return OP_CVT;
461   case SM4_OPCODE_UADDC:     return OP_ADD;
462   case SM4_OPCODE_USUBB:     return OP_SUB;
463   case SM4_OPCODE_COUNTBITS: return OP_POPCNT;
464
465   case SM4_OPCODE_ATOMIC_AND:       return OP_AND;
466   case SM4_OPCODE_ATOMIC_OR:        return OP_OR;
467   case SM4_OPCODE_ATOMIC_XOR:       return OP_XOR;
468   case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE;
469   case SM4_OPCODE_ATOMIC_IADD:      return OP_ADD;
470   case SM4_OPCODE_ATOMIC_IMAX:      return OP_MAX;
471   case SM4_OPCODE_ATOMIC_IMIN:      return OP_MIN;
472   case SM4_OPCODE_ATOMIC_UMAX:      return OP_MAX;
473   case SM4_OPCODE_ATOMIC_UMIN:      return OP_MIN;
474
475   case SM4_OPCODE_SYNC:  return OP_MEMBAR;
476   case SM4_OPCODE_DADD:  return OP_ADD;
477   case SM4_OPCODE_DMAX:  return OP_MAX;
478   case SM4_OPCODE_DMIN:  return OP_MIN;
479   case SM4_OPCODE_DMUL:  return OP_MUL;
480   case SM4_OPCODE_DEQ:   return OP_SET;
481   case SM4_OPCODE_DGE:   return OP_SET;
482   case SM4_OPCODE_DLT:   return OP_SET;
483   case SM4_OPCODE_DNE:   return OP_SET;
484   case SM4_OPCODE_DMOV:  return OP_MOV;
485   case SM4_OPCODE_DMOVC: return OP_MOV;
486   case SM4_OPCODE_DTOF:  return OP_CVT;
487   case SM4_OPCODE_FTOD:  return OP_CVT;
488
489   default:
490      return OP_NOP;
491   }
492}
493
494unsigned int
495Converter::getDstOpndCount(enum sm4_opcode opcode) const
496{
497   switch (opcode) {
498   case SM4_OPCODE_SINCOS:
499   case SM4_OPCODE_UDIV:
500   case SM4_OPCODE_IMUL:
501   case SM4_OPCODE_UMUL:
502      return 2;
503   case SM4_OPCODE_BREAK:
504   case SM4_OPCODE_BREAKC:
505   case SM4_OPCODE_CALL:
506   case SM4_OPCODE_CALLC:
507   case SM4_OPCODE_CONTINUE:
508   case SM4_OPCODE_CONTINUEC:
509   case SM4_OPCODE_DISCARD:
510   case SM4_OPCODE_EMIT:
511   case SM4_OPCODE_EMIT_STREAM:
512   case SM4_OPCODE_CUT:
513   case SM4_OPCODE_CUT_STREAM:
514   case SM4_OPCODE_EMITTHENCUT:
515   case SM4_OPCODE_EMITTHENCUT_STREAM:
516   case SM4_OPCODE_IF:
517   case SM4_OPCODE_ELSE:
518   case SM4_OPCODE_ENDIF:
519   case SM4_OPCODE_LOOP:
520   case SM4_OPCODE_ENDLOOP:
521   case SM4_OPCODE_RET:
522   case SM4_OPCODE_RETC:
523   case SM4_OPCODE_SYNC:
524   case SM4_OPCODE_SWITCH:
525   case SM4_OPCODE_CASE:
526   case SM4_OPCODE_HS_DECLS:
527   case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
528   case SM4_OPCODE_HS_FORK_PHASE:
529   case SM4_OPCODE_HS_JOIN_PHASE:
530      return 0;
531   default:
532      return 1;
533   }
534}
535
536#define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
537#define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
538   return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
539
540TexTarget
541Converter::cvtTexTarget(enum sm4_target targ,
542                        enum sm4_opcode op, operation *opr) const
543{
544   bool dc = (op == SM4_OPCODE_SAMPLE_C ||
545              op == SM4_OPCODE_SAMPLE_C_LZ ||
546              op == SM4_OPCODE_GATHER4_C ||
547              op == SM4_OPCODE_GATHER4_PO_C);
548
549   if (opr) {
550      switch (targ) {
551      case SM4_TARGET_RAW_BUFFER:        *opr = OP_LOAD; break;
552      case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break;
553      default:
554         *opr = OP_TEX;
555         break;
556      }
557   }
558
559   switch (targ) {
560   TARG_CASE_1(UNKNOWN, 2D);
561   TARG_CASE_2(TEXTURE1D,         1D);
562   TARG_CASE_2(TEXTURE2D,         2D);
563   TARG_CASE_1(TEXTURE2DMS,       2D_MS);
564   TARG_CASE_1(TEXTURE3D,         3D);
565   TARG_CASE_2(TEXTURECUBE,       CUBE);
566   TARG_CASE_2(TEXTURE1DARRAY,    1D_ARRAY);
567   TARG_CASE_2(TEXTURE2DARRAY,    2D_ARRAY);
568   TARG_CASE_1(TEXTURE2DMSARRAY,  2D_MS_ARRAY);
569   TARG_CASE_2(TEXTURECUBEARRAY,  CUBE_ARRAY);
570   TARG_CASE_1(BUFFER,            BUFFER);
571   TARG_CASE_1(RAW_BUFFER,        BUFFER);
572   TARG_CASE_1(STRUCTURED_BUFFER, BUFFER);
573   default:
574      assert(!"invalid SM4 texture target");
575      return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D;
576   }
577}
578
579static inline uint32_t
580getSVIndex(enum sm4_sv sv)
581{
582   switch (sv) {
583   case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0;
584   case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1;
585   case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2;
586   case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3;
587
588   case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4;
589   case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5;
590
591   case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0;
592   case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1;
593   case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2;
594
595   case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4;
596
597   case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0;
598
599   case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4;
600
601   default:
602      return 0;
603   }
604}
605
606SVSemantic
607Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const
608{
609   idx = 0;
610
611   switch (sv) {
612   case SM4_SV_UNDEFINED:     return SV_UNDEFINED;
613   case SM4_SV_POSITION:      return SV_POSITION;
614   case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE;
615   case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction
616   case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER;
617   case SM4_SV_VIEWPORT_ARRAY_INDEX:  return SV_VIEWPORT_INDEX;
618   case SM4_SV_VERTEX_ID:     return SV_VERTEX_ID;
619   case SM4_SV_PRIMITIVE_ID:  return SV_PRIMITIVE_ID;
620   case SM4_SV_INSTANCE_ID:   return SV_INSTANCE_ID;
621   case SM4_SV_IS_FRONT_FACE: return SV_FACE;
622   case SM4_SV_SAMPLE_INDEX:  return SV_SAMPLE_INDEX;
623
624   case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
625   case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
626   case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
627   case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
628   case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR:
629   case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR:
630   case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
631   case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
632   case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
633   case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR:
634   case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR:
635   case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR:
636      idx = getSVIndex(sv);
637      return SV_TESS_FACTOR;
638
639   default:
640      assert(!"invalid SM4 system value");
641      return SV_UNDEFINED;
642   }
643}
644
645unsigned
646Converter::tgsiSemantic(SVSemantic sv, int index)
647{
648   switch (sv) {
649   case SV_POSITION:       return TGSI_SEMANTIC_POSITION;
650   case SV_FACE:           return TGSI_SEMANTIC_FACE;
651   case SV_LAYER:          return NV50_SEMANTIC_LAYER;
652   case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX;
653   case SV_POINT_SIZE:     return TGSI_SEMANTIC_PSIZE;
654   case SV_CLIP_DISTANCE:  return NV50_SEMANTIC_CLIPDISTANCE;
655   case SV_VERTEX_ID:      return TGSI_SEMANTIC_VERTEXID;
656   case SV_INSTANCE_ID:    return TGSI_SEMANTIC_INSTANCEID;
657   case SV_PRIMITIVE_ID:   return TGSI_SEMANTIC_PRIMID;
658   case SV_TESS_FACTOR:    return NV50_SEMANTIC_TESSFACTOR;
659   case SV_TESS_COORD:     return NV50_SEMANTIC_TESSCOORD;
660   case SV_INVOCATION_ID:  return NV50_SEMANTIC_INVOCATIONID;
661   default:
662      return TGSI_SEMANTIC_GENERIC;
663   }
664}
665
666void
667Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input)
668{
669   unsigned int i;
670   for (i = 0; i < info.numSysVals; ++i)
671      if (info.sv[i].sn == sn &&
672          info.sv[i].si == si)
673         return;
674   info.numSysVals = i + 1;
675   info.sv[i].sn = sn;
676   info.sv[i].si = si;
677   info.sv[i].mask = mask;
678   info.sv[i].input = input ? 1 : 0;
679}
680
681bool
682Converter::parseSignature()
683{
684   struct nv50_ir_varying *patch;
685   unsigned int i, r, n;
686
687   info.numInputs = 0;
688   info.numOutputs = 0;
689   info.numPatchConstants = 0;
690
691   for (n = 0, i = 0; i < sm4.num_params_in; ++i) {
692      r = sm4.params_in[i].Register;
693
694      info.in[r].mask |= sm4.params_in[i].ReadWriteMask;
695      // mask might be uninitialized ...
696      if (!sm4.params_in[i].ReadWriteMask)
697	  info.in[r].mask = 0xf;
698      info.in[r].id = r;
699      if (info.in[r].regular) // already assigned semantic name/index
700         continue;
701      info.in[r].regular = 1;
702      info.in[r].patch = 0;
703
704      info.numInputs = MAX2(info.numInputs, r + 1);
705
706      switch (sm4.params_in[i].SystemValueType) {
707      case D3D_NAME_UNDEFINED:
708         info.in[r].sn = TGSI_SEMANTIC_GENERIC;
709         info.in[r].si = n++;
710         break;
711      case D3D_NAME_POSITION:
712         info.in[r].sn = TGSI_SEMANTIC_POSITION;
713         break;
714      case D3D_NAME_VERTEX_ID:
715         info.in[r].sn = TGSI_SEMANTIC_VERTEXID;
716         break;
717      case D3D_NAME_PRIMITIVE_ID:
718         info.in[r].sn = TGSI_SEMANTIC_PRIMID;
719         // no corresponding output
720         recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
721         break;
722      case D3D_NAME_INSTANCE_ID:
723         info.in[r].sn = TGSI_SEMANTIC_INSTANCEID;
724         break;
725      case D3D_NAME_IS_FRONT_FACE:
726         info.in[r].sn = TGSI_SEMANTIC_FACE;
727         // no corresponding output
728         recordSV(TGSI_SEMANTIC_FACE, 0, 1, true);
729         break;
730      default:
731         assert(!"invalid/unsupported input linkage semantic");
732         break;
733      }
734   }
735
736   for (n = 0, i = 0; i < sm4.num_params_out; ++i) {
737      r = sm4.params_out[i].Register;
738
739      info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask;
740      info.out[r].id = r;
741      if (info.out[r].regular) // already assigned semantic name/index
742         continue;
743      info.out[r].regular = 1;
744      info.out[r].patch = 0;
745
746      info.numOutputs = MAX2(info.numOutputs, r + 1);
747
748      switch (sm4.params_out[i].SystemValueType) {
749      case D3D_NAME_UNDEFINED:
750         if (prog->getType() == Program::TYPE_FRAGMENT) {
751            info.out[r].sn = TGSI_SEMANTIC_COLOR;
752            info.out[r].si = info.prop.fp.numColourResults++;
753         } else {
754            info.out[r].sn = TGSI_SEMANTIC_GENERIC;
755            info.out[r].si = n++;
756         }
757         break;
758      case D3D_NAME_POSITION:
759      case D3D_NAME_DEPTH:
760      case D3D_NAME_DEPTH_GREATER_EQUAL:
761      case D3D_NAME_DEPTH_LESS_EQUAL:
762         info.out[r].sn = TGSI_SEMANTIC_POSITION;
763         info.io.fragDepth = r;
764         break;
765      case D3D_NAME_CULL_DISTANCE:
766      case D3D_NAME_CLIP_DISTANCE:
767         info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE;
768         info.out[r].si = sm4.params_out[i].SemanticIndex;
769         break;
770      case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
771         info.out[r].sn = NV50_SEMANTIC_LAYER;
772         break;
773      case D3D_NAME_VIEWPORT_ARRAY_INDEX:
774         info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX;
775         break;
776      case D3D_NAME_PRIMITIVE_ID:
777         info.out[r].sn = TGSI_SEMANTIC_PRIMID;
778         break;
779      case D3D_NAME_TARGET:
780         info.out[r].sn = TGSI_SEMANTIC_COLOR;
781         info.out[r].si = sm4.params_out[i].SemanticIndex;
782         break;
783      case D3D_NAME_COVERAGE:
784         info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK;
785         info.io.sampleMask = r;
786         break;
787      case D3D_NAME_SAMPLE_INDEX:
788      default:
789         assert(!"invalid/unsupported output linkage semantic");
790         break;
791      }
792   }
793
794   if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
795      patch = &info.in[info.numInputs];
796   else
797      patch = &info.out[info.numOutputs];
798
799   for (n = 0, i = 0; i < sm4.num_params_patch; ++i) {
800      r = sm4.params_patch[i].Register;
801
802      patch[r].mask |= sm4.params_patch[i].Mask;
803      patch[r].id = r;
804      if (patch[r].regular) // already visited
805         continue;
806      patch[r].regular = 1;
807      patch[r].patch = 1;
808
809      info.numPatchConstants = MAX2(info.numPatchConstants, r + 1);
810
811      switch (sm4.params_patch[i].SystemValueType) {
812      case D3D_NAME_UNDEFINED:
813         patch[r].sn = TGSI_SEMANTIC_GENERIC;
814         patch[r].si = n++;
815         break;
816      case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
817      case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
818      case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
819         patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
820         patch[r].si = sm4.params_patch[i].SemanticIndex;
821         break;
822      case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
823      case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
824      case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
825         patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
826         patch[r].si = sm4.params_patch[i].SemanticIndex + 4;
827         break;
828      default:
829         assert(!"invalid patch-constant linkage semantic");
830         break;
831      }
832   }
833   if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
834      info.numInputs += info.numPatchConstants;
835   else
836      info.numOutputs += info.numPatchConstants;
837
838   return true;
839}
840
841bool
842Converter::inspectDeclaration(const sm4_dcl& dcl)
843{
844   int idx = -1;
845   enum sm4_interpolation ipa_mode;
846
847   if (dcl.op.get() && dcl.op->is_index_simple(0))
848      idx = dcl.op->indices[0].disp;
849
850   switch (dcl.opcode) {
851   case SM4_OPCODE_DCL_SAMPLER:
852      assert(idx >= 0);
853      shadow[idx] = dcl.dcl_sampler.shadow;
854      break;
855   case SM4_OPCODE_DCL_RESOURCE:
856   {
857      enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target;
858
859      assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES);
860      resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL);
861      resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL);
862   }
863      break;
864   case SM4_OPCODE_DCL_CONSTANT_BUFFER:
865      // nothing to do
866      break;
867   case SM4_OPCODE_CUSTOMDATA:
868      info.immd.bufSize = dcl.num * 4;
869      info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize);
870      memcpy(info.immd.buf, dcl.data, info.immd.bufSize);
871      break;
872   case SM4_OPCODE_DCL_INDEX_RANGE:
873      // XXX: ?
874      break;
875   case SM4_OPCODE_DCL_INPUT_PS_SGV:
876   case SM4_OPCODE_DCL_INPUT_PS_SIV:
877   case SM4_OPCODE_DCL_INPUT_PS:
878   {
879      assert(idx >= 0 && idx < info.numInputs);
880      ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation;
881      interpMode[idx] = cvtInterpMode(ipa_mode);
882      setVaryingInterpMode(&info.in[idx], interpMode[idx]);
883   }
884      break;
885   case SM4_OPCODE_DCL_INPUT_SGV:
886   case SM4_OPCODE_DCL_INPUT_SIV:
887   case SM4_OPCODE_DCL_INPUT:
888      if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) {
889         idx = info.numInputs++;
890         info.in[idx].sn = NV50_SEMANTIC_TESSCOORD;
891         info.in[idx].mask = dcl.op->mask;
892      }
893      // rest handled in parseSignature
894      break;
895   case SM4_OPCODE_DCL_OUTPUT_SGV:
896   case SM4_OPCODE_DCL_OUTPUT_SIV:
897      switch (dcl.sv) {
898      case SM4_SV_POSITION:
899         assert(prog->getType() != Program::TYPE_FRAGMENT);
900         break;
901      case SM4_SV_CULL_DISTANCE: // XXX: order ?
902         info.io.cullDistanceMask |= 1 << info.io.clipDistanceMask;
903      // fall through
904      case SM4_SV_CLIP_DISTANCE:
905         info.io.clipDistanceMask++; // abuse as count
906         break;
907      default:
908         break;
909      }
910      switch (dcl.op->file) {
911      case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
912      case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
913      case SM4_FILE_OUTPUT_DEPTH:
914         if (info.io.fragDepth < 0xff)
915            break;
916         idx = info.io.fragDepth = info.numOutputs++;
917         info.out[idx].sn = TGSI_SEMANTIC_POSITION;
918         break;
919      case SM4_FILE_OUTPUT_COVERAGE_MASK:
920         if (info.io.sampleMask < 0xff)
921            break;
922         idx = info.io.sampleMask = info.numOutputs++;
923         info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK;
924         break;
925      default:
926         break;
927      }
928      break;
929   case SM4_OPCODE_DCL_OUTPUT:
930      // handled in parseSignature
931      break;
932   case SM4_OPCODE_DCL_TEMPS:
933      nrRegVals += dcl.num;
934      break;
935   case SM4_OPCODE_DCL_INDEXABLE_TEMP:
936      nrArrays++;
937      break;
938   case SM4_OPCODE_DCL_GLOBAL_FLAGS:
939      if (prog->getType() == Program::TYPE_FRAGMENT)
940         info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil;
941      break;
942
943   case SM4_OPCODE_DCL_FUNCTION_BODY:
944      break;
945   case SM4_OPCODE_DCL_FUNCTION_TABLE:
946      break;
947   case SM4_OPCODE_DCL_INTERFACE:
948      break;
949
950      // GP
951   case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
952      info.prop.gp.outputPrim = g3dPrim(
953         dcl.dcl_gs_output_primitive_topology.primitive_topology);
954      break;
955   case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
956      info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive);
957      break;
958   case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
959      info.prop.gp.maxVertices = dcl.num;
960      break;
961   case SM4_OPCODE_DCL_GS_INSTANCE_COUNT:
962      info.prop.gp.instanceCount = dcl.num;
963      break;
964   case SM4_OPCODE_DCL_STREAM:
965      break;
966
967      // TCP/TEP
968   case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
969      info.prop.tp.inputPatchSize =
970         dcl.dcl_input_control_point_count.control_points;
971      break;
972   case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
973      info.prop.tp.outputPatchSize =
974         dcl.dcl_output_control_point_count.control_points;
975      break;
976   case SM4_OPCODE_DCL_TESS_DOMAIN:
977      switch (dcl.dcl_tess_domain.domain) {
978      case D3D_TESSELLATOR_DOMAIN_ISOLINE:
979         info.prop.tp.domain = PIPE_PRIM_LINES;
980         break;
981      case D3D_TESSELLATOR_DOMAIN_TRI:
982         info.prop.tp.domain = PIPE_PRIM_TRIANGLES;
983         break;
984      case D3D_TESSELLATOR_DOMAIN_QUAD:
985         info.prop.tp.domain = PIPE_PRIM_QUADS;
986         break;
987      case D3D_TESSELLATOR_DOMAIN_UNDEFINED:
988      default:
989         info.prop.tp.domain = PIPE_PRIM_MAX;
990         break;
991      }
992      break;
993   case SM4_OPCODE_DCL_TESS_PARTITIONING:
994      switch (dcl.dcl_tess_partitioning.partitioning) {
995      case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
996         info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD;
997         break;
998      case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
999         info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN;
1000         break;
1001      case D3D_TESSELLATOR_PARTITIONING_POW2:
1002         info.prop.tp.partitioning = NV50_TESS_PART_POW2;
1003         break;
1004      case D3D_TESSELLATOR_PARTITIONING_INTEGER:
1005      case D3D_TESSELLATOR_PARTITIONING_UNDEFINED:
1006      default:
1007         info.prop.tp.partitioning = NV50_TESS_PART_INTEGER;
1008         break;
1009      }
1010      break;
1011   case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
1012      switch (dcl.dcl_tess_output_primitive.primitive) {
1013      case D3D_TESSELLATOR_OUTPUT_LINE:
1014         info.prop.tp.outputPrim = PIPE_PRIM_LINES;
1015         break;
1016      case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
1017         info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
1018         info.prop.tp.winding = +1;
1019         break;
1020      case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
1021         info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
1022         info.prop.tp.winding = -1;
1023         break;
1024      case D3D_TESSELLATOR_OUTPUT_POINT:
1025         info.prop.tp.outputPrim = PIPE_PRIM_POINTS;
1026         break;
1027      case D3D_TESSELLATOR_OUTPUT_UNDEFINED:
1028      default:
1029         info.prop.tp.outputPrim = PIPE_PRIM_MAX;
1030         break;
1031      }
1032      break;
1033
1034   case SM4_OPCODE_HS_FORK_PHASE:
1035      ++subPhaseCnt[0];
1036      phase = 1;
1037      break;
1038   case SM4_OPCODE_HS_JOIN_PHASE:
1039      phase = 2;
1040      ++subPhaseCnt[1];
1041      break;
1042   case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1043   case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1044   case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR:
1045      break;
1046
1047      // weird stuff
1048   case SM4_OPCODE_DCL_THREAD_GROUP:
1049   case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
1050   case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
1051   case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
1052   case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
1053   case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
1054   case SM4_OPCODE_DCL_RESOURCE_RAW:
1055   case SM4_OPCODE_DCL_RESOURCE_STRUCTURED:
1056      ERROR("unhandled declaration\n");
1057      abort();
1058      return false;
1059
1060   default:
1061      assert(!"invalid SM4 declaration");
1062      return false;
1063   }
1064   return true;
1065}
1066
1067void
1068Converter::allocateValues()
1069{
1070   lData = new DataArray[nrArrays];
1071
1072   for (unsigned int i = 0; i < nrArrays; ++i)
1073      lData[i].setParent(this);
1074
1075   tData32.setup(0, nrRegVals, 4, 4, FILE_GPR);
1076   tData64.setup(0, nrRegVals, 2, 8, FILE_GPR);
1077
1078   if (prog->getType() == Program::TYPE_FRAGMENT)
1079      oData.setup(0, info.numOutputs, 4, 4, FILE_GPR);
1080}
1081
1082bool Converter::handleDeclaration(const sm4_dcl& dcl)
1083{
1084   switch (dcl.opcode) {
1085   case SM4_OPCODE_DCL_INDEXABLE_TEMP:
1086      lData[nrArrays++].setup(arrayVol,
1087                              dcl.indexable_temp.num, dcl.indexable_temp.comps,
1088                              4, FILE_MEMORY_LOCAL);
1089      arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4;
1090      break;
1091   case SM4_OPCODE_HS_FORK_PHASE:
1092      if (subPhaseCnt[0])
1093         phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1];
1094      ++subPhaseCnt[0];
1095      break;
1096   case SM4_OPCODE_HS_JOIN_PHASE:
1097      if (subPhaseCnt[1])
1098         phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1];
1099      ++subPhaseCnt[1];
1100      break;
1101   case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
1102      phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num;
1103      break;
1104   case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
1105      phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num;
1106      break;
1107
1108   default:
1109      break; // already handled in inspection
1110   }
1111
1112   return true;
1113}
1114
1115Symbol *
1116Converter::iSym(int i, int c)
1117{
1118   if (info.in[i].regular) {
1119      return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4);
1120   } else {
1121      return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si);
1122   }
1123}
1124
1125Symbol *
1126Converter::oSym(int i, int c)
1127{
1128   if (info.out[i].regular) {
1129      return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4);
1130   } else {
1131      return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si);
1132   }
1133}
1134
1135Value *
1136Converter::getSrcPtr(int s, int dim, int shl)
1137{
1138   if (srcPtr[s][dim])
1139      return srcPtr[s][dim];
1140
1141   sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get();
1142
1143   if (!op)
1144      return NULL;
1145
1146   Value *index = src(*op, 0, s);
1147
1148   srcPtr[s][dim] = index;
1149   if (shl)
1150      srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
1151   return srcPtr[s][dim];
1152}
1153
1154Value *
1155Converter::getDstPtr(int d, int dim, int shl)
1156{
1157   assert(d == 0);
1158   if (dstPtr[dim])
1159      return dstPtr[dim];
1160
1161   sm4_op *op = insn->ops[d]->indices[dim].reg.get();
1162   if (!op)
1163      return NULL;
1164
1165   Value *index = src(*op, 0, d);
1166   if (shl)
1167      index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
1168
1169   return (dstPtr[dim] = index);
1170}
1171
1172Value *
1173Converter::getVtxPtr(int s)
1174{
1175   assert(s < 3);
1176   if (vtxBase[s])
1177      return vtxBase[s];
1178
1179   sm4_op *op = insn->ops[s + nDstOpnds].get();
1180   if (!op)
1181      return NULL;
1182   int idx = op->indices[0].disp;
1183
1184   vtxBase[s] = getSrcPtr(s, 0, 0);
1185   vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]);
1186   return vtxBase[s];
1187}
1188
1189Value *
1190Converter::src(int i, int c)
1191{
1192   return src(*insn->ops[i + nDstOpnds], c, i);
1193}
1194
1195Value *
1196Converter::dst(int i, int c)
1197{
1198   return dst(*insn->ops[i], c, i);
1199}
1200
1201void
1202Converter::saveDst(int i, int c, Value *value)
1203{
1204   if (insn->insn.sat)
1205      mkOp1(OP_SAT, dTy, value, value);
1206   return saveDst(*insn->ops[i], c, value, i);
1207}
1208
1209Value *
1210Converter::interpolate(const sm4_op& op, int c, int i)
1211{
1212   int idx = op.indices[0].disp;
1213   int swz = op.swizzle[c];
1214   operation opr =
1215      (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP;
1216
1217   Value *ptr = getSrcPtr(i, 0, 4);
1218
1219   Instruction *insn = new_Instruction(func, opr, TYPE_F32);
1220
1221   insn->setDef(0, getScratch());
1222   insn->setSrc(0, iSym(idx, swz));
1223   if (opr == OP_PINTERP)
1224      insn->setSrc(1, fragCoord[3]);
1225   if (ptr)
1226      insn->setIndirect(0, 0, ptr);
1227
1228   insn->setInterpolate(interpMode[idx]);
1229
1230   bb->insertTail(insn);
1231   return insn->getDef(0);
1232}
1233
1234Value *
1235Converter::src(const sm4_op& op, int c, int s)
1236{
1237   const int size = typeSizeof(sTy);
1238
1239   Instruction *ld;
1240   Value *res, *ptr, *vtx;
1241   int idx, dim, off;
1242   const int swz = op.swizzle[c];
1243
1244   switch (op.file) {
1245   case SM4_FILE_IMMEDIATE32:
1246      res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32);
1247      break;
1248   case SM4_FILE_IMMEDIATE64:
1249      assert(c < 2);
1250      res = loadImm(NULL, op.imm_values[swz].u64);
1251      break;
1252   case SM4_FILE_TEMP:
1253      assert(op.is_index_simple(0));
1254      idx = op.indices[0].disp;
1255      if (size == 8)
1256         res = tData64.load(idx, swz, NULL);
1257      else
1258         res = tData32.load(idx, swz, NULL);
1259      break;
1260   case SM4_FILE_INPUT:
1261   case SM4_FILE_INPUT_CONTROL_POINT:
1262   case SM4_FILE_INPUT_PATCH_CONSTANT:
1263      if (prog->getType() == Program::TYPE_FRAGMENT)
1264         return interpolate(op, c, s);
1265
1266      idx = 0;
1267      if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
1268         idx = info.numInputs - info.numPatchConstants;
1269
1270      if (op.num_indices == 2) {
1271         vtx = getVtxPtr(s);
1272         ptr = getSrcPtr(s, 1, 4);
1273         idx += op.indices[1].disp;
1274         res = getSSA();
1275         ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz));
1276         ld->setIndirect(0, 0, ptr);
1277         ld->setIndirect(0, 1, vtx);
1278      } else {
1279         idx += op.indices[0].disp;
1280         res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4));
1281      }
1282      if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
1283         res->defs->getInsn()->perPatch = 1;
1284      break;
1285   case SM4_FILE_CONSTANT_BUFFER:
1286      assert(op.num_indices == 2);
1287      assert(op.is_index_simple(0));
1288
1289      ptr = getSrcPtr(s, 1, 4);
1290      dim = op.indices[0].disp;
1291      off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
1292
1293      res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr);
1294      break;
1295   case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER:
1296      ptr = getSrcPtr(s, 0, 4);
1297      off = (op.indices[0].disp * 4 + swz) * 4;
1298      res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr);
1299      break;
1300   case SM4_FILE_INDEXABLE_TEMP:
1301   {
1302      assert(op.is_index_simple(0));
1303      int a = op.indices[0].disp;
1304      idx = op.indices[1].disp;
1305      res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4));
1306   }
1307      break;
1308   case SM4_FILE_INPUT_PRIMITIVEID:
1309      recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
1310      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
1311      break;
1312   case SM4_FILE_INPUT_GS_INSTANCE_ID:
1313   case SM4_FILE_OUTPUT_CONTROL_POINT_ID:
1314      recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true);
1315      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0));
1316      break;
1317   case SM4_FILE_CYCLE_COUNTER:
1318      res =
1319         mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0));
1320      break;
1321   case SM4_FILE_INPUT_FORK_INSTANCE_ID:
1322   case SM4_FILE_INPUT_JOIN_INSTANCE_ID:
1323   {
1324      phaseInstanceUsed = true;
1325      if (unrollPhase)
1326         return loadImm(NULL, phaseInstance);
1327      const unsigned int cnt = phaseInstCnt[phase - 1][subPhase];
1328      res = getScratch();
1329      res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0));
1330      res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1));
1331   }
1332      break;
1333   case SM4_FILE_INPUT_DOMAIN_POINT:
1334      assert(swz < 3);
1335      res = domainPt[swz];
1336      break;
1337   case SM4_FILE_THREAD_GROUP_SHARED_MEMORY:
1338      off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
1339      ptr = getSrcPtr(s, 0, 4);
1340      res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr);
1341      break;
1342   case SM4_FILE_RESOURCE:
1343   case SM4_FILE_SAMPLER:
1344   case SM4_FILE_UNORDERED_ACCESS_VIEW:
1345      return NULL;
1346   case SM4_FILE_INPUT_THREAD_ID:
1347      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz));
1348      break;
1349   case SM4_FILE_INPUT_THREAD_GROUP_ID:
1350      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz));
1351      break;
1352   case SM4_FILE_FUNCTION_INPUT:
1353   case SM4_FILE_INPUT_THREAD_ID_IN_GROUP:
1354      assert(!"unhandled source file");
1355      return NULL;
1356   default:
1357      assert(!"invalid source file");
1358      return NULL;
1359   }
1360
1361   if (op.abs)
1362      res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res);
1363   if (op.neg)
1364      res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res);
1365   return res;
1366}
1367
1368Value *
1369Converter::dst(const sm4_op &op, int c, int i)
1370{
1371   switch (op.file) {
1372   case SM4_FILE_TEMP:
1373      return tData32.acquire(op.indices[0].disp, c);
1374   case SM4_FILE_INDEXABLE_TEMP:
1375      return getScratch();
1376   case SM4_FILE_OUTPUT:
1377      if (prog->getType() == Program::TYPE_FRAGMENT)
1378         return oData.acquire(op.indices[0].disp, c);
1379      return getScratch();
1380   case SM4_FILE_NULL:
1381      return NULL;
1382   case SM4_FILE_OUTPUT_DEPTH:
1383   case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
1384   case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
1385   case SM4_FILE_OUTPUT_COVERAGE_MASK:
1386      return getScratch();
1387   case SM4_FILE_IMMEDIATE32:
1388   case SM4_FILE_IMMEDIATE64:
1389   case SM4_FILE_CONSTANT_BUFFER:
1390   case SM4_FILE_RESOURCE:
1391   case SM4_FILE_SAMPLER:
1392   case SM4_FILE_UNORDERED_ACCESS_VIEW:
1393      assert(!"invalid destination file");
1394      return NULL;
1395   default:
1396      assert(!"invalid file");
1397      return NULL;
1398   }
1399}
1400
1401void
1402Converter::saveFragDepth(operation op, Value *value)
1403{
1404   if (op == OP_MIN || op == OP_MAX) {
1405      Value *zIn;
1406      zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2));
1407      value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn);
1408   }
1409   oData.store(info.io.fragDepth, 2, NULL, value);
1410}
1411
1412void
1413Converter::saveDst(const sm4_op &op, int c, Value *value, int s)
1414{
1415   Symbol *sym;
1416   Instruction *st;
1417   int a, idx;
1418
1419   switch (op.file) {
1420   case SM4_FILE_TEMP:
1421      idx = op.indices[0].disp;
1422      tData32.store(idx, c, NULL, value);
1423      break;
1424   case SM4_FILE_INDEXABLE_TEMP:
1425      a = op.indices[0].disp;
1426      idx = op.indices[1].disp;
1427      // FIXME: shift is wrong, depends in lData
1428      lData[a].store(idx, c, getDstPtr(s, 1, 4), value);
1429      break;
1430   case SM4_FILE_OUTPUT:
1431      assert(op.num_indices == 1);
1432      idx = op.indices[0].disp;
1433      if (prog->getType() == Program::TYPE_FRAGMENT) {
1434         oData.store(idx, c, NULL, value);
1435      } else {
1436         if (phase)
1437            idx += info.numOutputs - info.numPatchConstants;
1438         const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4;
1439         sym = oSym(idx, c);
1440         if (sym->reg.file == FILE_SHADER_OUTPUT)
1441            st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value);
1442         else
1443            st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value);
1444         st->perPatch = phase ? 1 : 0;
1445      }
1446      break;
1447   case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
1448      saveFragDepth(OP_MAX, value);
1449      break;
1450   case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
1451      saveFragDepth(OP_MIN, value);
1452      break;
1453   case SM4_FILE_OUTPUT_DEPTH:
1454      saveFragDepth(OP_NOP, value);
1455      break;
1456   case SM4_FILE_OUTPUT_COVERAGE_MASK:
1457      oData.store(info.io.sampleMask, 0, NULL, value);
1458      break;
1459   case SM4_FILE_IMMEDIATE32:
1460   case SM4_FILE_IMMEDIATE64:
1461   case SM4_FILE_INPUT:
1462   case SM4_FILE_CONSTANT_BUFFER:
1463   case SM4_FILE_RESOURCE:
1464   case SM4_FILE_SAMPLER:
1465      assert(!"invalid destination file");
1466      return;
1467   default:
1468      assert(!"invalid file");
1469      return;
1470   }
1471}
1472
1473void
1474Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4])
1475{
1476   Value *res[4] = { NULL, NULL, NULL, NULL };
1477   unsigned int c, d;
1478
1479   for (c = 0; c < 4; ++c)
1480      if (dst0[c])
1481         tex->tex.mask |= 1 << swz[c];
1482   for (d = 0, c = 0; c < 4; ++c)
1483      if (tex->tex.mask & (1 << c))
1484         tex->setDef(d++, (res[c] = getScratch()));
1485
1486   bb->insertTail(tex);
1487
1488   if (insn->opcode == SM4_OPCODE_RESINFO) {
1489      if (tex->tex.target.getDim() == 1) {
1490	 res[2] = loadImm(NULL, 0);
1491         if (!tex->tex.target.isArray())
1492            res[1] = res[2];
1493      } else
1494      if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) {
1495         res[2] = loadImm(NULL, 0);
1496      }
1497      for (c = 0; c < 4; ++c) {
1498         if (!dst0[c])
1499            continue;
1500         Value *src = res[swz[c]];
1501         assert(src);
1502         switch (insn->insn.resinfo_return_type) {
1503         case 0:
1504            mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
1505            break;
1506         case 1:
1507            mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
1508            if (swz[c] < tex->tex.target.getDim())
1509               mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]);
1510            break;
1511         default:
1512            mkMov(dst0[c], src);
1513            break;
1514         }
1515      }
1516   } else {
1517      for (c = 0; c < 4; ++c)
1518         if (dst0[c])
1519            mkMov(dst0[c], res[swz[c]]);
1520   }
1521}
1522
1523void
1524Converter::handleQUERY(Value *dst0[4], enum TexQuery query)
1525{
1526   TexInstruction *texi = new_TexInstruction(func, OP_TXQ);
1527   texi->tex.query = query;
1528
1529   assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs
1530
1531   const int rOp = (query == TXQ_DIMS) ? 2 : 1;
1532   const int sOp = (query == TXQ_DIMS) ? 0 : 1;
1533
1534   const int tR = insn->ops[rOp]->indices[0].disp;
1535
1536   texi->setTexture(resourceType[tR][0], tR, 0);
1537
1538   texi->setSrc(0, src(sOp, 0)); // mip level or sample index
1539
1540   emitTex(dst0, texi, insn->ops[rOp]->swizzle);
1541}
1542
1543void
1544Converter::handleLOAD(Value *dst0[4])
1545{
1546   TexInstruction *texi = new_TexInstruction(func, OP_TXF);
1547   unsigned int c;
1548
1549   const int tR = insn->ops[2]->indices[0].disp;
1550
1551   texi->setTexture(resourceType[tR][0], tR, 0);
1552
1553   for (c = 0; c < texi->tex.target.getArgCount(); ++c)
1554      texi->setSrc(c, src(0, c));
1555
1556   if (texi->tex.target == TEX_TARGET_BUFFER) {
1557      texi->tex.levelZero = true;
1558   } else {
1559      texi->setSrc(c++, src(0, 3));
1560      for (c = 0; c < 3; ++c) {
1561         texi->tex.offset[0][c] = insn->sample_offset[c];
1562	 if (texi->tex.offset[0][c])
1563            texi->tex.useOffsets = 1;
1564      }
1565   }
1566
1567   emitTex(dst0, texi, insn->ops[2]->swizzle);
1568}
1569
1570// order of nv50 ir sources: x y z/layer lod/bias dc
1571void
1572Converter::handleSAMPLE(operation opr, Value *dst0[4])
1573{
1574   TexInstruction *texi = new_TexInstruction(func, opr);
1575   unsigned int c, s;
1576   Value *arg[4], *src0[4];
1577   Value *val;
1578   Value *lod = NULL, *dc = NULL;
1579
1580   const int tR = insn->ops[2]->indices[0].disp;
1581   const int tS = insn->ops[3]->indices[0].disp;
1582
1583   TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0];
1584
1585   for (c = 0; c < tgt.getArgCount(); ++c)
1586      arg[c] = src0[c] = src(0, c);
1587
1588   if (insn->opcode == SM4_OPCODE_SAMPLE_L ||
1589       insn->opcode == SM4_OPCODE_SAMPLE_B) {
1590      lod = src(3, 0);
1591   } else
1592   if (insn->opcode == SM4_OPCODE_SAMPLE_C ||
1593       insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) {
1594      dc = src(3, 0);
1595      if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ)
1596         texi->tex.levelZero = true;
1597   } else
1598   if (insn->opcode == SM4_OPCODE_SAMPLE_D) {
1599      for (c = 0; c < tgt.getDim(); ++c) {
1600         texi->dPdx[c] = src(3, c);
1601         texi->dPdy[c] = src(4, c);
1602      }
1603   }
1604
1605   if (tgt.isCube()) {
1606      for (c = 0; c < 3; ++c)
1607         src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1608      val = getScratch();
1609      mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]);
1610      mkOp2(OP_MAX, TYPE_F32, val, src0[2], val);
1611      mkOp1(OP_RCP, TYPE_F32, val, val);
1612      for (c = 0; c < 3; ++c)
1613         src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1614   }
1615
1616   for (s = 0; s < tgt.getArgCount(); ++s)
1617      texi->setSrc(s, src0[s]);
1618   if (lod)
1619      texi->setSrc(s++, lod);
1620   if (dc)
1621      texi->setSrc(s++, dc);
1622
1623   for (c = 0; c < 3; ++c) {
1624      texi->tex.offset[0][c] = insn->sample_offset[c];
1625      if (texi->tex.offset[0][c])
1626         texi->tex.useOffsets = 1;
1627   }
1628
1629   texi->setTexture(tgt, tR, tS);
1630
1631   emitTex(dst0, texi, insn->ops[2]->swizzle);
1632}
1633
1634void
1635Converter::handleDP(Value *dst0[4], int dim)
1636{
1637   Value *src0 = src(0, 0), *src1 = src(1, 0);
1638   Value *dotp = getScratch();
1639
1640   assert(dim > 0);
1641
1642   mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1643   for (int c = 1; c < dim; ++c)
1644      mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp);
1645
1646   for (int c = 0; c < 4; ++c)
1647      dst0[c] = dotp;
1648}
1649
1650void
1651Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1652{
1653   FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1654   join->fixed = 1;
1655   conv->insertHead(join);
1656
1657   fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1658   fork->insertBefore(fork->getExit(), fork->joinAt);
1659}
1660
1661void
1662Converter::finalizeShader()
1663{
1664   if (finalized)
1665      return;
1666   BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
1667   entryBBs.pop();
1668
1669   finalized = true;
1670
1671   bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
1672   setPosition(epilogue, true);
1673
1674   if (prog->getType() == Program::TYPE_FRAGMENT)
1675      exportOutputs();
1676
1677   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
1678}
1679
1680#define FOR_EACH_DST0_ENABLED_CHANNEL32(chan)         \
1681   for ((chan) = 0; (chan) < 4; ++(chan))             \
1682      if (insn->ops[0].get()->mask & (1 << (chan)))
1683
1684#define FOR_EACH_DST0_ENABLED_CHANNEL64(chan)         \
1685   for ((chan) = 0; (chan) < 2; ++(chan))             \
1686      if (insn->ops[0].get()->mask & (1 << (chan)))
1687
1688bool
1689Converter::checkDstSrcAliasing() const
1690{
1691   for (unsigned int d = 0; d < nDstOpnds; ++d) {
1692      for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) {
1693         if (insn->ops[d]->file != insn->ops[s]->file)
1694            continue;
1695         int i = insn->ops[s]->num_indices - 1;
1696         if (i != insn->ops[d]->num_indices - 1)
1697            continue;
1698         if (insn->ops[d]->is_index_simple(i) &&
1699             insn->ops[s]->is_index_simple(i) &&
1700             insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp)
1701            return true;
1702      }
1703   }
1704   return false;
1705}
1706
1707bool
1708Converter::handleInstruction(unsigned int pos)
1709{
1710   Value *dst0[4], *rDst0[4];
1711   Value *dst1[4], *rDst1[4];
1712   int c, nc;
1713
1714   insn = sm4.insns[pos];
1715   enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode);
1716
1717   operation op = cvtOpcode(opcode);
1718
1719   sTy = inferSrcType(opcode);
1720   dTy = inferDstType(opcode);
1721
1722   nc = dTy == TYPE_F64 ? 2 : 4;
1723
1724   nDstOpnds = getDstOpndCount(opcode);
1725
1726   bool useScratchDst = checkDstSrcAliasing();
1727
1728   INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst);
1729
1730   if (nDstOpnds >= 1) {
1731      for (c = 0; c < nc; ++c)
1732         rDst0[c] = dst0[c] =
1733            insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL;
1734      if (useScratchDst)
1735         for (c = 0; c < nc; ++c)
1736            dst0[c] = rDst0[c] ? getScratch() : NULL;
1737   }
1738
1739   if (nDstOpnds >= 2) {
1740      for (c = 0; c < nc; ++c)
1741         rDst1[c] = dst1[c] =
1742            insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL;
1743      if (useScratchDst)
1744         for (c = 0; c < nc; ++c)
1745            dst1[c] = rDst1[c] ? getScratch() : NULL;
1746   }
1747
1748   switch (insn->opcode) {
1749   case SM4_OPCODE_ADD:
1750   case SM4_OPCODE_AND:
1751   case SM4_OPCODE_DIV:
1752   case SM4_OPCODE_IADD:
1753   case SM4_OPCODE_IMAX:
1754   case SM4_OPCODE_IMIN:
1755   case SM4_OPCODE_MIN:
1756   case SM4_OPCODE_MAX:
1757   case SM4_OPCODE_MUL:
1758   case SM4_OPCODE_OR:
1759   case SM4_OPCODE_UMAX:
1760   case SM4_OPCODE_UMIN:
1761   case SM4_OPCODE_XOR:
1762      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1763         Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1764         if (dTy == TYPE_F32)
1765            insn->ftz = 1;
1766      }
1767      break;
1768
1769   case SM4_OPCODE_ISHL:
1770   case SM4_OPCODE_ISHR:
1771   case SM4_OPCODE_USHR:
1772      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1773         Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1774         insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP;
1775      }
1776      break;
1777
1778   case SM4_OPCODE_IMAD:
1779   case SM4_OPCODE_MAD:
1780   case SM4_OPCODE_UMAD:
1781      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1782         mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c));
1783      }
1784      break;
1785
1786   case SM4_OPCODE_DADD:
1787   case SM4_OPCODE_DMAX:
1788   case SM4_OPCODE_DMIN:
1789   case SM4_OPCODE_DMUL:
1790      FOR_EACH_DST0_ENABLED_CHANNEL64(c) {
1791         mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
1792      }
1793      break;
1794
1795   case SM4_OPCODE_UDIV:
1796      for (c = 0; c < 4; ++c) {
1797         Value *dvn, *dvs;
1798         if (dst0[c] || dst1[c]) {
1799            dvn = src(0, c);
1800            dvs = src(1, c);
1801         }
1802         if (dst0[c])
1803            mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs);
1804         if (dst1[c])
1805            mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs);
1806      }
1807      break;
1808
1809   case SM4_OPCODE_IMUL:
1810   case SM4_OPCODE_UMUL:
1811      for (c = 0; c < 4; ++c) {
1812         Value *a, *b;
1813         if (dst0[c] || dst1[c]) {
1814            a = src(0, c);
1815            b = src(1, c);
1816         }
1817         if (dst0[c])
1818            mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp =
1819               NV50_IR_SUBOP_MUL_HIGH;
1820         if (dst1[c])
1821            mkOp2(OP_MUL, dTy, dst1[c], a, b);
1822      }
1823      break;
1824
1825   case SM4_OPCODE_DP2:
1826      handleDP(dst0, 2);
1827      break;
1828   case SM4_OPCODE_DP3:
1829      handleDP(dst0, 3);
1830      break;
1831   case SM4_OPCODE_DP4:
1832      handleDP(dst0, 4);
1833      break;
1834
1835   case SM4_OPCODE_DERIV_RTX:
1836   case SM4_OPCODE_DERIV_RTX_COARSE:
1837   case SM4_OPCODE_DERIV_RTX_FINE:
1838   case SM4_OPCODE_DERIV_RTY:
1839   case SM4_OPCODE_DERIV_RTY_COARSE:
1840   case SM4_OPCODE_DERIV_RTY_FINE:
1841   case SM4_OPCODE_MOV:
1842   case SM4_OPCODE_INEG:
1843   case SM4_OPCODE_NOT:
1844   case SM4_OPCODE_SQRT:
1845   case SM4_OPCODE_COUNTBITS:
1846   case SM4_OPCODE_EXP:
1847   case SM4_OPCODE_LOG:
1848   case SM4_OPCODE_RCP:
1849      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1850         mkOp1(op, dTy, dst0[c], src(0, c));
1851      }
1852      break;
1853
1854   case SM4_OPCODE_FRC:
1855      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1856         Value *val = getScratch();
1857         Value *src0 = src(0, c);
1858         mkOp1(OP_FLOOR, TYPE_F32, val, src0);
1859         mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val);
1860      }
1861      break;
1862
1863   case SM4_OPCODE_MOVC:
1864      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1865         mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c),
1866               src(0, c));
1867      break;
1868
1869   case SM4_OPCODE_ROUND_NE:
1870   case SM4_OPCODE_ROUND_NI:
1871   case SM4_OPCODE_ROUND_PI:
1872   case SM4_OPCODE_ROUND_Z:
1873      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1874         Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c));
1875         rnd->ftz = 1;
1876         rnd->rnd = cvtRoundingMode(opcode);
1877      }
1878      break;
1879
1880   case SM4_OPCODE_RSQ:
1881      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1882         mkOp1(op, dTy, dst0[c], src(0, c));
1883      break;
1884
1885   case SM4_OPCODE_SINCOS:
1886      for (c = 0; c < 4; ++c) {
1887         if (!dst0[c] && !dst1[c])
1888            continue;
1889         Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c));
1890         if (dst0[c])
1891            mkOp1(OP_SIN, TYPE_F32, dst0[c], val);
1892         if (dst1[c])
1893            mkOp1(OP_COS, TYPE_F32, dst1[c], val);
1894      }
1895      break;
1896
1897   case SM4_OPCODE_EQ:
1898   case SM4_OPCODE_GE:
1899   case SM4_OPCODE_IEQ:
1900   case SM4_OPCODE_IGE:
1901   case SM4_OPCODE_ILT:
1902   case SM4_OPCODE_LT:
1903   case SM4_OPCODE_NE:
1904   case SM4_OPCODE_INE:
1905   case SM4_OPCODE_ULT:
1906   case SM4_OPCODE_UGE:
1907   case SM4_OPCODE_DEQ:
1908   case SM4_OPCODE_DGE:
1909   case SM4_OPCODE_DLT:
1910   case SM4_OPCODE_DNE:
1911   {
1912      CondCode cc = cvtCondCode(opcode);
1913      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
1914         CmpInstruction *set;
1915         set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL);
1916         set->setType(dTy, sTy);
1917         if (sTy == TYPE_F32)
1918            set->ftz = 1;
1919      }
1920   }
1921      break;
1922
1923   case SM4_OPCODE_FTOI:
1924   case SM4_OPCODE_FTOU:
1925      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1926         mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z;
1927      break;
1928   case SM4_OPCODE_ITOF:
1929   case SM4_OPCODE_UTOF:
1930   case SM4_OPCODE_F32TOF16:
1931   case SM4_OPCODE_F16TOF32:
1932   case SM4_OPCODE_DTOF:
1933   case SM4_OPCODE_FTOD:
1934      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
1935         mkCvt(op, dTy, dst0[c], sTy, src(0, c));
1936      break;
1937
1938   case SM4_OPCODE_CUT:
1939   case SM4_OPCODE_CUT_STREAM:
1940      mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1;
1941      break;
1942   case SM4_OPCODE_EMIT:
1943   case SM4_OPCODE_EMIT_STREAM:
1944      mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1;
1945      break;
1946   case SM4_OPCODE_EMITTHENCUT:
1947   case SM4_OPCODE_EMITTHENCUT_STREAM:
1948   {
1949      Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL,  mkImm(0));
1950      cut->fixed = 1;
1951      cut->subOp = NV50_IR_SUBOP_EMIT_RESTART;
1952   }
1953      break;
1954
1955   case SM4_OPCODE_DISCARD:
1956      info.prop.fp.usesDiscard = TRUE;
1957      mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(
1958         insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
1959      break;
1960
1961   case SM4_OPCODE_CALL:
1962   case SM4_OPCODE_CALLC:
1963      assert(!"CALL/CALLC not implemented");
1964      break;
1965
1966   case SM4_OPCODE_RET:
1967      // XXX: the following doesn't work with subroutines / early ret
1968      if (!haveNextPhase(pos))
1969         finalizeShader();
1970      else
1971         phaseEnded = phase + 1;
1972      break;
1973
1974   case SM4_OPCODE_IF:
1975   {
1976      BasicBlock *ifClause = new BasicBlock(func);
1977
1978      bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE);
1979      condBBs.push(bb);
1980      joinBBs.push(bb);
1981
1982      mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0));
1983
1984      setPosition(ifClause, true);
1985   }
1986      break;
1987   case SM4_OPCODE_ELSE:
1988   {
1989      BasicBlock *elseClause = new BasicBlock(func);
1990      BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
1991
1992      forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE);
1993      condBBs.push(bb);
1994
1995      forkPoint->getExit()->asFlow()->target.bb = elseClause;
1996      if (!bb->isTerminated())
1997         mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
1998
1999      setPosition(elseClause, true);
2000   }
2001      break;
2002   case SM4_OPCODE_ENDIF:
2003   {
2004      BasicBlock *convPoint = new BasicBlock(func);
2005      BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
2006      BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
2007
2008      if (!bb->isTerminated()) {
2009         // we only want join if none of the clauses ended with CONT/BREAK/RET
2010         if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2011            insertConvergenceOps(convPoint, forkPoint);
2012         mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL);
2013         bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
2014      }
2015
2016      if (lastBB->getExit()->op == OP_BRA) {
2017         lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
2018         lastBB->getExit()->asFlow()->target.bb = convPoint;
2019      }
2020      setPosition(convPoint, true);
2021   }
2022      break;
2023
2024   case SM4_OPCODE_SWITCH:
2025   case SM4_OPCODE_CASE:
2026   case SM4_OPCODE_ENDSWITCH:
2027      assert(!"SWITCH/CASE/ENDSWITCH not implemented");
2028      break;
2029
2030   case SM4_OPCODE_LOOP:
2031   {
2032      BasicBlock *loopHeader = new BasicBlock(func);
2033      BasicBlock *loopBreak = new BasicBlock(func);
2034
2035      loopBBs.push(loopHeader);
2036      breakBBs.push(loopBreak);
2037      if (loopBBs.getSize() > func->loopNestingBound)
2038         func->loopNestingBound++;
2039
2040      mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL);
2041
2042      bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE);
2043      setPosition(loopHeader, true);
2044      mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL);
2045   }
2046      break;
2047   case SM4_OPCODE_ENDLOOP:
2048   {
2049      BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
2050
2051      if (!bb->isTerminated()) {
2052         mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2053         bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2054      }
2055      setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
2056   }
2057      break;
2058   case SM4_OPCODE_BREAK:
2059   {
2060      if (bb->isTerminated())
2061         break;
2062      BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2063      mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL);
2064      bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
2065   }
2066      break;
2067   case SM4_OPCODE_BREAKC:
2068   {
2069      BasicBlock *nextBB = new BasicBlock(func);
2070      BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
2071      CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P;
2072      mkFlow(OP_BREAK, breakBB, cc, src(0, 0));
2073      bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
2074      bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
2075      setPosition(nextBB, true);
2076   }
2077      break;
2078   case SM4_OPCODE_CONTINUE:
2079   {
2080      if (bb->isTerminated())
2081         break;
2082      BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2083      mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2084      contBB->explicitCont = true;
2085      bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2086   }
2087      break;
2088   case SM4_OPCODE_CONTINUEC:
2089   {
2090      BasicBlock *nextBB = new BasicBlock(func);
2091      BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
2092      mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
2093      bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2094      bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
2095      setPosition(nextBB, true);
2096   }
2097      break;
2098
2099   case SM4_OPCODE_SAMPLE:
2100   case SM4_OPCODE_SAMPLE_C:
2101   case SM4_OPCODE_SAMPLE_C_LZ:
2102   case SM4_OPCODE_SAMPLE_L:
2103   case SM4_OPCODE_SAMPLE_D:
2104   case SM4_OPCODE_SAMPLE_B:
2105      handleSAMPLE(op, dst0);
2106      break;
2107   case SM4_OPCODE_LD:
2108   case SM4_OPCODE_LD_MS:
2109      handleLOAD(dst0);
2110      break;
2111
2112   case SM4_OPCODE_GATHER4:
2113      assert(!"GATHER4 not implemented\n");
2114      break;
2115
2116   case SM4_OPCODE_RESINFO:
2117      handleQUERY(dst0, TXQ_DIMS);
2118      break;
2119   case SM4_OPCODE_SAMPLE_POS:
2120      handleQUERY(dst0, TXQ_SAMPLE_POSITION);
2121      break;
2122
2123   case SM4_OPCODE_NOP:
2124      mkOp(OP_NOP, TYPE_NONE, NULL);
2125      break;
2126
2127   case SM4_OPCODE_HS_DECLS:
2128      // XXX: any significance ?
2129      break;
2130   case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
2131      phase = 0;
2132      break;
2133   case SM4_OPCODE_HS_FORK_PHASE:
2134      if (phase != 1)
2135         subPhase = 0;
2136      phase = 1;
2137      phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
2138      phaseStart = pos;
2139      if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase])
2140         unrollPhase = true;
2141      break;
2142   case SM4_OPCODE_HS_JOIN_PHASE:
2143      if (phase != 2)
2144         subPhase = 0;
2145      phase = 2;
2146      phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
2147      phaseStart = pos;
2148      if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase])
2149         unrollPhase = true;
2150      break;
2151
2152   default:
2153      ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode);
2154      abort();
2155      return false;
2156   }
2157
2158   for (c = 0; c < nc; ++c) {
2159      if (nDstOpnds >= 1 && rDst0[c]) {
2160         if (dst0[c] != rDst0[c])
2161            mkMov(rDst0[c], dst0[c]);
2162         saveDst(0, c, rDst0[c]);
2163      }
2164      if (nDstOpnds >= 2 && rDst1[c]) {
2165         if (dst1[c] != rDst1[c])
2166            mkMov(rDst1[c], dst1[c]);
2167         saveDst(1, c, rDst1[c]);
2168      }
2169   }
2170
2171   memset(srcPtr, 0, sizeof(srcPtr));
2172   memset(dstPtr, 0, sizeof(dstPtr));
2173   memset(vtxBase, 0, sizeof(vtxBase));
2174   return true;
2175}
2176
2177void
2178Converter::exportOutputs()
2179{
2180   for (int i = 0; i < info.numOutputs; ++i) {
2181      for (int c = 0; c < 4; ++c) {
2182         if (!oData.exists(i, c))
2183            continue;
2184         Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
2185                                info.out[i].slot[c] * 4);
2186         Value *val = oData.load(i, c, NULL);
2187         if (val)
2188            mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
2189      }
2190   }
2191}
2192
2193Converter::Converter(Program *p, struct nv50_ir_prog_info *s)
2194   : tData32(this),
2195     tData64(this),
2196     oData(this),
2197     info(*s),
2198     sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)),
2199     prog(p)
2200{
2201   memset(srcPtr, 0, sizeof(srcPtr));
2202   memset(dstPtr, 0, sizeof(dstPtr));
2203   memset(vtxBase, 0, sizeof(vtxBase));
2204
2205   memset(interpMode, 0, sizeof(interpMode));
2206
2207   nrRegVals = nrArrays = arrayVol = 0;
2208
2209   for (phase = 3; phase > 0; --phase)
2210      for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
2211         out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT;
2212
2213   unrollPhase = false;
2214   phaseStart = 0;
2215   subPhaseCnt[0] = subPhaseCnt[1] = 0;
2216}
2217
2218Converter::~Converter()
2219{
2220   if (lData)
2221      delete[] lData;
2222
2223   if (subPhaseCnt[0])
2224      delete[] phaseInstCnt[0];
2225   if (subPhaseCnt[1])
2226      delete[] phaseInstCnt[1];
2227}
2228
2229bool
2230Converter::haveNextPhase(unsigned int pos) const
2231{
2232   ++pos;
2233   return (pos < sm4.insns.size()) &&
2234      (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE ||
2235       sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE);
2236}
2237
2238bool
2239Converter::run()
2240{
2241   parseSignature();
2242
2243   for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
2244      inspectDeclaration(*sm4.dcls[pos]);
2245
2246   phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]];
2247   phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]];
2248   for (int i = 0; i < subPhaseCnt[0]; ++i)
2249      phaseInstCnt[0][i] = -1;
2250   for (int i = 0; i < subPhaseCnt[1]; ++i)
2251      phaseInstCnt[1][i] = -1;
2252   // re-increased in handleDeclaration:
2253   subPhaseCnt[0] = subPhaseCnt[1] = 0;
2254
2255   allocateValues();
2256   nrArrays = 0;
2257   for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
2258      handleDeclaration(*sm4.dcls[pos]);
2259
2260   info.io.genUserClip = -1; // no UCPs permitted with SM4 shaders
2261   info.io.clipDistanceMask = (1 << info.io.clipDistanceMask) - 1;
2262
2263   info.assignSlots(&info);
2264
2265   if (sm4.dcls.size() == 0 && sm4.insns.size() == 0)
2266      return true;
2267
2268   BasicBlock *entry = new BasicBlock(prog->main);
2269   BasicBlock *leave = new BasicBlock(prog->main);
2270
2271   prog->main->setEntry(entry);
2272   prog->main->setExit(leave);
2273
2274   setPosition(entry, true);
2275
2276   entryBBs.push(entry);
2277   leaveBBs.push(leave);
2278
2279   if (prog->getType() == Program::TYPE_FRAGMENT) {
2280      Symbol *sv = mkSysVal(SV_POSITION, 3);
2281      fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
2282      mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
2283   } else
2284   if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) {
2285      const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2;
2286      int c;
2287      for (c = 0; c < n; ++c)
2288         domainPt[c] =
2289            mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c));
2290      if (c == 2)
2291         domainPt[2] = loadImm(NULL, 0.0f);
2292   }
2293
2294   finalized = false;
2295   phaseEnded = 0;
2296   phase = 0;
2297   subPhase = 0;
2298   for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) {
2299      handleInstruction(pos);
2300      if (likely(phase == 0) || (phaseEnded < 2))
2301         continue;
2302      phaseEnded = 0;
2303      if (!unrollPhase || !phaseInstanceUsed) {
2304         ++subPhase;
2305         continue;
2306      }
2307      phaseInstanceUsed = false;
2308      if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1))
2309         pos = phaseStart - 1;
2310      else
2311         ++subPhase;
2312   }
2313   finalizeShader();
2314
2315   return true;
2316}
2317
2318} // anonymous namespace
2319
2320namespace nv50_ir {
2321
2322bool
2323Program::makeFromSM4(struct nv50_ir_prog_info *info)
2324{
2325   Converter bld(this, info);
2326   return bld.run();
2327}
2328
2329} // namespace nv50_ir
2330