sm4_to_tgsi.cpp revision 6c598c78bd17642d731cf57b8369cc794f64ba2f
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#include "sm4.h"
28#include "tgsi/tgsi_ureg.h"
29#include <vector>
30
31#if 1
32#define check(x) assert(x)
33#define fail(x) assert(0 && (x))
34#else
35#define check(x) do {if(!(x)) throw(#x);} while(0)
36#define fail(x) throw(x)
37#endif
38
39static unsigned sm4_to_pipe_interpolation[] =
40{
41	TGSI_INTERPOLATE_PERSPECTIVE, /* UNDEFINED */
42	TGSI_INTERPOLATE_CONSTANT,
43	TGSI_INTERPOLATE_PERSPECTIVE, /* LINEAR */
44	TGSI_INTERPOLATE_PERSPECTIVE, /* LINEAR_CENTROID */
45	TGSI_INTERPOLATE_LINEAR, /* LINEAR_NOPERSPECTIVE */
46	TGSI_INTERPOLATE_LINEAR, /* LINEAR_NOPERSPECTIVE_CENTROID */
47
48	// Added in D3D10.1
49	TGSI_INTERPOLATE_PERSPECTIVE, /* LINEAR_SAMPLE */
50	TGSI_INTERPOLATE_LINEAR, /* LINEAR_NOPERSPECTIVE_SAMPLE */
51};
52
53static int sm4_to_pipe_sv[] =
54{
55	-1,
56	TGSI_SEMANTIC_POSITION,
57	-1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
58	-1, /*TGSI_SEMANTIC_CULL_DISTANCE */
59	-1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
60	-1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
61	-1, /*TGSI_SEMANTIC_VERTEXID,*/
62	TGSI_SEMANTIC_PRIMID,
63	TGSI_SEMANTIC_INSTANCEID,
64	TGSI_SEMANTIC_FACE,
65	-1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
66};
67
68struct sm4_to_tgsi_converter
69{
70	struct ureg_program* ureg;
71	std::vector<struct ureg_dst> temps;
72	std::vector<struct ureg_dst> outputs;
73	std::vector<struct ureg_src> inputs;
74	std::vector<struct ureg_src> samplers;
75	std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
76	std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
77	std::vector<std::pair<unsigned, unsigned> > loops;
78	sm4_insn* insn;
79	struct sm4_program& program;
80	std::vector<unsigned> sm4_to_tgsi_insn_num;
81	std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
82	bool in_sub;
83	bool avoid_txf;
84	bool avoid_int;
85
86	sm4_to_tgsi_converter(struct sm4_program& program)
87	: program(program)
88	{
89		avoid_txf = true;
90		avoid_int = false;
91	}
92
93	struct ureg_dst _reg(sm4_op& op)
94	{
95		switch(op.file)
96		{
97		case SM4_FILE_NULL:
98		{
99			struct ureg_dst d;
100			memset(&d, 0, sizeof(d));
101			d.File = TGSI_FILE_NULL;
102			return d;
103		}
104		case SM4_FILE_TEMP:
105			check(op.has_simple_index());
106			check(op.indices[0].disp < temps.size());
107			return temps[op.indices[0].disp];
108		case SM4_FILE_OUTPUT:
109			check(op.has_simple_index());
110			check(op.indices[0].disp < outputs.size());
111			return outputs[op.indices[0].disp];
112		default:
113			check(0);
114			return ureg_dst_undef();
115		}
116	}
117
118	struct ureg_dst _dst(unsigned i = 0)
119	{
120		check(i < insn->num_ops);
121		sm4_op& op = *insn->ops[i];
122		check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
123		struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
124		if(insn->insn.sat)
125			d = ureg_saturate(d);
126		return d;
127	}
128
129	struct ureg_src _src(unsigned i)
130	{
131		check(i < insn->num_ops);
132		sm4_op& op = *insn->ops[i];
133		struct ureg_src s;
134		switch(op.file)
135		{
136		case SM4_FILE_IMMEDIATE32:
137			s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
138			break;
139		case SM4_FILE_INPUT:
140			check(op.has_simple_index());
141			check(op.indices[0].disp < inputs.size());
142			s = inputs[op.indices[0].disp];
143			break;
144		case SM4_FILE_CONSTANT_BUFFER:
145			// TODO: indirect addressing
146			check(op.num_indices == 2);
147			check(op.is_index_simple(0));
148			check(op.is_index_simple(1));
149			s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
150			s.Dimension = 1;
151			s.DimensionIndex = op.indices[0].disp;
152			break;
153		default:
154			s = ureg_src(_reg(op));
155			break;
156		}
157		if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
158			s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
159		else
160		{
161			/* immediates are masked to show needed values */
162			check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
163		}
164		if(op.abs)
165			s = ureg_abs(s);
166		if(op.neg)
167			s = ureg_negate(s);
168		return s;
169	};
170
171	int _idx(sm4_file file, unsigned i = 0)
172	{
173		check(i < insn->num_ops);
174		sm4_op& op = *insn->ops[i];
175		check(op.file == file);
176		check(op.has_simple_index());
177		return (int)op.indices[0].disp;
178	}
179
180	int _texslot(bool have_sampler = true)
181	{
182		std::map<std::pair<int, int>, int>::iterator i;
183		i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1));
184		check(i != program.resource_sampler_to_slot.end());
185		return i->second;
186	}
187
188	unsigned tex_target(unsigned texslot)
189	{
190		unsigned mode = sampler_modes[program.slot_to_sampler[texslot]];
191		unsigned target;
192		if(mode)
193			target = targets[program.slot_to_resource[texslot]].second;
194		else
195			target = targets[program.slot_to_resource[texslot]].first;
196		check(target);
197		return target;
198	}
199
200	std::vector<struct ureg_dst> insn_tmps;
201
202	struct ureg_dst _tmp()
203	{
204		struct ureg_dst t = ureg_DECL_temporary(ureg);
205		insn_tmps.push_back(t);
206		return t;
207	}
208
209	struct ureg_dst _tmp(struct ureg_dst d)
210	{
211		if(d.File == TGSI_FILE_TEMPORARY)
212			return d;
213		else
214			return ureg_writemask(_tmp(), d.WriteMask);
215	}
216
217#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
218#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
219#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
220#define OP1(n) OP1_(n, n)
221#define OP2(n) OP2_(n, n)
222#define OP3(n) OP3_(n, n)
223#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
224
225	void translate_insns(unsigned begin, unsigned end)
226	{
227		for(unsigned insn_num = begin; insn_num < end; ++insn_num)
228		{
229			sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
230			unsigned label;
231			insn = program.insns[insn_num];
232			bool ok;
233			ok = true;
234			switch(insn->opcode)
235			{
236			// trivial instructions
237			case SM4_OPCODE_NOP:
238				break;
239			OP1(MOV);
240
241			// float
242			OP2(ADD);
243			OP2(MUL);
244			OP3(MAD);
245			OP2(DIV);
246			OP1(FRC);
247			OP1(RCP);
248			OP2(MIN);
249			OP2(MAX);
250			OP2_(LT, SLT);
251			OP2_(GE, SGE);
252			OP2_(EQ, SEQ);
253			OP2_(NE, SNE);
254
255			// bitwise
256			OP1(NOT);
257			OP2(AND);
258			OP2(OR);
259			OP2(XOR);
260
261			// special mathematical
262			OP2(DP2);
263			OP2(DP3);
264			OP2(DP4);
265			OP1(RSQ);
266			OP1_(LOG, LG2);
267			OP1_(EXP, EX2);
268
269			// rounding
270			OP1_(ROUND_NE, ROUND);
271			OP1_(ROUND_Z, TRUNC);
272			OP1_(ROUND_PI, CEIL);
273			OP1_(ROUND_NI, FLR);
274
275			// cross-thread
276			OP1_(DERIV_RTX, DDX);
277			OP1_(DERIV_RTX_COARSE, DDX);
278			OP1_(DERIV_RTX_FINE, DDX);
279			OP1_(DERIV_RTY, DDY);
280			OP1_(DERIV_RTY_COARSE, DDY);
281			OP1_(DERIV_RTY_FINE, DDY);
282			case SM4_OPCODE_EMIT:
283				ureg_EMIT(ureg);
284				break;
285			case SM4_OPCODE_CUT:
286				ureg_ENDPRIM(ureg);
287				break;
288			case SM4_OPCODE_EMITTHENCUT:
289				ureg_EMIT(ureg);
290				ureg_ENDPRIM(ureg);
291				break;
292
293			// non-trivial instructions
294			case SM4_OPCODE_MOVC:
295				/* CMP checks for < 0, but MOVC checks for != 0
296				 * but fortunately, x != 0 is equivalent to -abs(x) < 0
297				 * XXX: can test_nz apply to this?!
298				 */
299				ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
300				break;
301			case SM4_OPCODE_SQRT:
302			{
303				struct ureg_dst d = _dst();
304				struct ureg_dst t = _tmp(d);
305				ureg_RSQ(ureg, t, _src(1));
306				ureg_RCP(ureg, d, ureg_src(t));
307				break;
308			}
309			case SM4_OPCODE_SINCOS:
310			{
311				struct ureg_dst s = _dst(0);
312				struct ureg_dst c = _dst(1);
313				struct ureg_src v = _src(2);
314				if(s.File != TGSI_FILE_NULL)
315					ureg_SIN(ureg, s, v);
316				if(c.File != TGSI_FILE_NULL)
317					ureg_COS(ureg, c, v);
318				break;
319			}
320
321			// control flow
322			case SM4_OPCODE_DISCARD:
323				ureg_KIL(ureg, _src(0));
324				break;
325			OP_CF(LOOP, BGNLOOP);
326			OP_CF(ENDLOOP, ENDLOOP);
327			case SM4_OPCODE_BREAK:
328				ureg_BRK(ureg);
329				break;
330			case SM4_OPCODE_BREAKC:
331				// XXX: can test_nz apply to this?!
332				ureg_BREAKC(ureg, _src(0));
333				break;
334			case SM4_OPCODE_CONTINUE:
335				ureg_CONT(ureg);
336				break;
337			case SM4_OPCODE_CONTINUEC:
338				// XXX: can test_nz apply to this?!
339				ureg_IF(ureg, _src(0), &label);
340				ureg_CONT(ureg);
341				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
342				ureg_ENDIF(ureg);
343				break;
344			case SM4_OPCODE_SWITCH:
345				ureg_SWITCH(ureg, _src(0));
346				break;
347			case SM4_OPCODE_CASE:
348				ureg_CASE(ureg, _src(0));
349				break;
350			case SM4_OPCODE_DEFAULT:
351				ureg_DEFAULT(ureg);
352				break;
353			case SM4_OPCODE_ENDSWITCH:
354				ureg_ENDSWITCH(ureg);
355				break;
356			case SM4_OPCODE_CALL:
357				ureg_CAL(ureg, &label);
358				label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
359				break;
360			case SM4_OPCODE_LABEL:
361				if(in_sub)
362					ureg_ENDSUB(ureg);
363				else
364					ureg_END(ureg);
365				ureg_BGNSUB(ureg);
366				in_sub = true;
367				break;
368			case SM4_OPCODE_RET:
369				if(in_sub || insn_num != (program.insns.size() - 1))
370					ureg_RET(ureg);
371				break;
372			case SM4_OPCODE_RETC:
373				ureg_IF(ureg, _src(0), &label);
374				if(insn->insn.test_nz)
375					ureg_RET(ureg);
376				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
377				if(!insn->insn.test_nz)
378				{
379					ureg_ELSE(ureg, &label);
380					ureg_RET(ureg);
381					ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
382				}
383				ureg_ENDIF(ureg);
384				break;
385			OP_CF(ELSE, ELSE);
386			case SM4_OPCODE_ENDIF:
387				ureg_ENDIF(ureg);
388				break;
389			case SM4_OPCODE_IF:
390				if(insn->insn.test_nz)
391				{
392					ureg_IF(ureg, _src(0), &label);
393					label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
394				}
395				else
396				{
397					unsigned linked = program.cf_insn_linked[insn_num];
398					if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
399					{
400						ureg_IF(ureg, _src(0), &label);
401						ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
402						ureg_ELSE(ureg, &label);
403						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
404					}
405					else
406					{
407						/* we have to swap the branches in this case (fun!)
408						 * TODO: maybe just emit a SEQ 0?
409						 * */
410						unsigned endif = program.cf_insn_linked[linked];
411
412						ureg_IF(ureg, _src(0), &label);
413						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
414
415						translate_insns(linked + 1, endif);
416
417						sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
418						ureg_ELSE(ureg, &label);
419						label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
420
421						translate_insns(insn_num + 1, linked);
422
423						insn_num = endif - 1;
424						goto next;
425					}
426				}
427				break;
428			case SM4_OPCODE_RESINFO:
429			{
430				std::map<int, int>::iterator i;
431				i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2));
432				check(i != program.resource_to_slot.end());
433				unsigned texslot = i->second;
434
435				// no driver actually provides this, unfortunately
436				ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]);
437				break;
438			};
439			// TODO: sample offset, sample index
440			case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch)
441			case SM4_OPCODE_LD_MS:
442			{
443				unsigned texslot = _texslot(false);
444				unsigned dim = 0;
445				switch(targets[texslot].first)
446				{
447				case TGSI_TEXTURE_1D:
448					dim = 1;
449					break;
450				case TGSI_TEXTURE_2D:
451				case TGSI_TEXTURE_RECT:
452					dim = 2;
453					break;
454				case TGSI_TEXTURE_3D:
455					dim = 3;
456					break;
457				default:
458					check(0);
459				}
460				struct ureg_dst tmp = _tmp();
461				if(avoid_txf)
462				{
463					struct ureg_src texcoord;
464					if(!avoid_int)
465					{
466						ureg_I2F(ureg, tmp, _src(1));
467						texcoord = ureg_src(tmp);
468					}
469					else
470						texcoord = _src(1);
471
472					ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]);
473				}
474				else
475					ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]);
476				break;
477			}
478			case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
479			{
480				unsigned texslot = _texslot();
481				ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]);
482				break;
483			}
484			case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
485			{
486				unsigned texslot = _texslot();
487				struct ureg_dst tmp = _tmp();
488				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
489				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
490				ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
491				break;
492			}
493			case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
494			{
495				unsigned texslot = _texslot();
496				struct ureg_dst tmp = _tmp();
497				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1));
498				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0));
499				ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
500				break;
501			}
502			case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
503			{
504				unsigned texslot = _texslot();
505				struct ureg_dst tmp = _tmp();
506				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1));
507				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0));
508				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0));
509				ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
510				break;
511			}
512			case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
513			{
514				unsigned texslot = _texslot();
515				ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5));
516				break;
517			}
518			case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
519			{
520				unsigned texslot = _texslot();
521				struct ureg_dst tmp = _tmp();
522				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
523				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
524				ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
525				break;
526			}
527			default:
528				ok = false;
529				break;
530			}
531
532			if(!ok && !avoid_int)
533			{
534				ok = true;
535				switch(insn->opcode)
536				{
537				// integer
538				OP1_(ITOF, I2F);
539				OP1_(FTOI, F2I);
540				OP2_(IADD, UADD);
541				OP1(INEG);
542				OP2_(IMUL, UMUL);
543				OP3_(IMAD, UMAD);
544				OP2_(ISHL, SHL);
545				OP2_(ISHR, ISHR);
546				OP2(IMIN);
547				OP2(IMAX);
548				OP2_(ILT, ISLT);
549				OP2_(IGE, ISGE);
550				OP2_(IEQ, USEQ);
551				OP2_(INE, USNE);
552
553				// unsigned
554				OP1_(UTOF, U2F);
555				OP1_(FTOU, F2U);
556				OP2(UMUL);
557				OP3(UMAD);
558				OP2(UMIN);
559				OP2(UMAX);
560				OP2_(ULT, USLT);
561				OP2_(UGE, USGE);
562				OP2(USHR);
563
564				case SM4_OPCODE_UDIV:
565				{
566					struct ureg_dst q = _dst(0);
567					struct ureg_dst r = _dst(1);
568					struct ureg_src a = _src(2);
569					struct ureg_src b = _src(3);
570					if(q.File != TGSI_FILE_NULL)
571						ureg_UDIV(ureg, q, a, b);
572					if(r.File != TGSI_FILE_NULL)
573						ureg_UMOD(ureg, r, a, b);
574					break;
575				}
576				default:
577					ok = false;
578				}
579			}
580
581			if(!ok && avoid_int)
582			{
583				ok = true;
584				switch(insn->opcode)
585				{
586				case SM4_OPCODE_ITOF:
587				case SM4_OPCODE_UTOF:
588					break;
589				OP1_(FTOI, TRUNC);
590				OP1_(FTOU, FLR);
591				// integer
592				OP2_(IADD, ADD);
593				OP2_(IMUL, MUL);
594				OP3_(IMAD, MAD);
595				OP2_(MIN, MIN);
596				OP2_(MAX, MAX);
597				OP2_(ILT, SLT);
598				OP2_(IGE, SGE);
599				OP2_(IEQ, SEQ);
600				OP2_(INE, SNE);
601
602				// unsigned
603				OP2_(UMUL, MUL);
604				OP3_(UMAD, MAD);
605				OP2_(UMIN, MIN);
606				OP2_(UMAX, MAX);
607				OP2_(ULT, SLT);
608				OP2_(UGE, SGE);
609
610				case SM4_OPCODE_INEG:
611					ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
612					break;
613				case SM4_OPCODE_ISHL:
614				{
615					struct ureg_dst d = _dst();
616					struct ureg_dst t = _tmp(d);
617					ureg_EX2(ureg, t, _src(2));
618					ureg_MUL(ureg, d, ureg_src(t), _src(1));
619					break;
620				}
621				case SM4_OPCODE_ISHR:
622				case SM4_OPCODE_USHR:
623				{
624					struct ureg_dst d = _dst();
625					struct ureg_dst t = _tmp(d);
626					ureg_EX2(ureg, t, ureg_negate(_src(2)));
627					ureg_MUL(ureg, t, ureg_src(t), _src(1));
628					ureg_FLR(ureg, d, ureg_src(t));
629					break;
630				}
631				case SM4_OPCODE_UDIV:
632				{
633					struct ureg_dst q = _dst(0);
634					struct ureg_dst r = _dst(1);
635					struct ureg_src a = _src(2);
636					struct ureg_src b = _src(3);
637					struct ureg_dst f = _tmp();
638					ureg_DIV(ureg, f, a, b);
639					if(q.File != TGSI_FILE_NULL)
640						ureg_FLR(ureg, q, ureg_src(f));
641					if(r.File != TGSI_FILE_NULL)
642					{
643						ureg_FRC(ureg, f, ureg_src(f));
644						ureg_MUL(ureg, r, ureg_src(f), b);
645					}
646					break;
647				}
648				default:
649					ok = false;
650				}
651			}
652
653			check(ok);
654
655			if(!insn_tmps.empty())
656			{
657				for(unsigned i = 0; i < insn_tmps.size(); ++i)
658					ureg_release_temporary(ureg, insn_tmps[i]);
659				insn_tmps.clear();
660			}
661next:;
662		}
663	}
664
665	void* do_translate()
666	{
667		unsigned processor;
668		switch(program.version.type)
669		{
670		case 0:
671			processor = TGSI_PROCESSOR_FRAGMENT;
672			break;
673		case 1:
674			processor = TGSI_PROCESSOR_VERTEX;
675			break;
676		case 2:
677			processor = TGSI_PROCESSOR_GEOMETRY;
678			break;
679		default:
680			fail("Tessellation and compute shaders not yet supported");
681			return 0;
682		}
683
684		if(!sm4_link_cf_insns(program))
685			fail("Malformed control flow");
686		if(!sm4_find_labels(program))
687			fail("Failed to locate labels");
688		if(!sm4_allocate_resource_sampler_pairs(program))
689			fail("Unsupported (indirect?) accesses to resources and/or samplers");
690
691		ureg = ureg_create(processor);
692
693		in_sub = false;
694
695		for(unsigned i = 0; i < program.slot_to_resource.size(); ++i)
696			samplers.push_back(ureg_DECL_sampler(ureg, i));
697
698		sm4_to_tgsi_insn_num.resize(program.insns.size());
699		for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
700		{
701			sm4_dcl& dcl = *program.dcls[insn_num];
702			int idx = -1;
703			if(dcl.op.get() && dcl.op->has_simple_index())
704				idx = dcl.op->indices[0].disp;
705			switch(dcl.opcode)
706			{
707			case SM4_OPCODE_DCL_GLOBAL_FLAGS:
708				break;
709			case SM4_OPCODE_DCL_TEMPS:
710				for(unsigned i = 0; i < dcl.num; ++i)
711					temps.push_back(ureg_DECL_temporary(ureg));
712				break;
713			case SM4_OPCODE_DCL_INPUT:
714				check(idx >= 0);
715				if(inputs.size() <= (unsigned)idx)
716					inputs.resize(idx + 1);
717				if(processor == TGSI_PROCESSOR_VERTEX)
718					inputs[idx] = ureg_DECL_vs_input(ureg, idx);
719				else
720					check(0);
721				break;
722			case SM4_OPCODE_DCL_INPUT_PS:
723				check(idx >= 0);
724				if(inputs.size() <= (unsigned)idx)
725					inputs.resize(idx + 1);
726				inputs[idx] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation]);
727				break;
728			case SM4_OPCODE_DCL_OUTPUT:
729				check(idx >= 0);
730				if(outputs.size() <= (unsigned)idx)
731					outputs.resize(idx + 1);
732				if(processor == TGSI_PROCESSOR_FRAGMENT)
733					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
734				else
735					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
736				break;
737			case SM4_OPCODE_DCL_INPUT_SIV:
738			case SM4_OPCODE_DCL_INPUT_SGV:
739			case SM4_OPCODE_DCL_INPUT_PS_SIV:
740			case SM4_OPCODE_DCL_INPUT_PS_SGV:
741				check(idx >= 0);
742				if(inputs.size() <= (unsigned)idx)
743					inputs.resize(idx + 1);
744				// TODO: is this correct?
745				inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
746				break;
747			case SM4_OPCODE_DCL_OUTPUT_SIV:
748			case SM4_OPCODE_DCL_OUTPUT_SGV:
749				check(idx >= 0);
750				if(outputs.size() <= (unsigned)idx)
751					outputs.resize(idx + 1);
752				check(sm4_to_pipe_sv[dcl.sv] >= 0);
753				outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
754				break;
755			case SM4_OPCODE_DCL_RESOURCE:
756				check(idx >= 0);
757				if(targets.size() <= (unsigned)idx)
758					targets.resize(idx + 1);
759				switch(dcl.dcl_resource.target)
760				{
761				case SM4_TARGET_TEXTURE1D:
762					targets[idx].first = TGSI_TEXTURE_1D;
763					targets[idx].second = TGSI_TEXTURE_SHADOW1D;
764					break;
765				case SM4_TARGET_TEXTURE2D:
766					targets[idx].first = TGSI_TEXTURE_2D;
767					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
768					break;
769				case SM4_TARGET_TEXTURE3D:
770					targets[idx].first = TGSI_TEXTURE_3D;
771					targets[idx].second = 0;
772					break;
773				case SM4_TARGET_TEXTURECUBE:
774					targets[idx].first = TGSI_TEXTURE_CUBE;
775					targets[idx].second = 0;
776					break;
777				default:
778					check(0);
779				}
780				break;
781			case SM4_OPCODE_DCL_SAMPLER:
782				check(idx >= 0);
783				if(sampler_modes.size() <= (unsigned)idx)
784					sampler_modes.resize(idx + 1);
785				check(!dcl.dcl_sampler.mono);
786				sampler_modes[idx] = dcl.dcl_sampler.shadow;
787				break;
788			case SM4_OPCODE_DCL_CONSTANT_BUFFER:
789				check(dcl.op->num_indices == 2);
790				check(dcl.op->is_index_simple(0));
791				check(dcl.op->is_index_simple(1));
792				idx = dcl.op->indices[0].disp;
793				ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
794				break;
795			default:
796				check(0);
797			}
798		}
799
800		translate_insns(0, program.insns.size());
801		sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
802		if(in_sub)
803			ureg_ENDSUB(ureg);
804		else
805			ureg_END(ureg);
806
807		for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
808			ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
809
810		const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
811		ureg_destroy(ureg);
812		return (void*)tokens;
813	}
814
815	void* translate()
816	{
817		try
818		{
819			return do_translate();
820		}
821		catch(const char*)
822		{
823			return 0;
824		}
825	}
826};
827
828void* sm4_to_tgsi(struct sm4_program& program)
829{
830	sm4_to_tgsi_converter conv(program);
831	return conv.translate();
832}
833