sm4_to_tgsi.cpp revision db6f1d0436b66435bac5e2b6db5d2f4e07e80473
1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#include "d3d1xstutil.h"
28#include "sm4.h"
29#include "tgsi/tgsi_ureg.h"
30#include <vector>
31
32#if 1
33#define check(x) assert(x)
34#define fail(x) assert(0 && (x))
35#else
36#define check(x) do {if(!(x)) throw(#x);} while(0)
37#define fail(x) throw(x)
38#endif
39
40struct tgsi_interpolation
41{
42	unsigned interpolation;
43	bool centroid;
44};
45
46static tgsi_interpolation sm4_to_pipe_interpolation[] =
47{
48	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */
49	{TGSI_INTERPOLATE_CONSTANT, false},
50	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */
51	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */
52	{TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */
53	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */
54
55	// Added in D3D10.1
56	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */
57	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */
58};
59
60static int sm4_to_pipe_sv[] =
61{
62	-1,
63	TGSI_SEMANTIC_POSITION,
64	-1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
65	-1, /*TGSI_SEMANTIC_CULL_DISTANCE */
66	-1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
67	-1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
68	-1, /*TGSI_SEMANTIC_VERTEXID,*/
69	TGSI_SEMANTIC_PRIMID,
70	TGSI_SEMANTIC_INSTANCEID,
71	TGSI_SEMANTIC_FACE,
72	-1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
73};
74
75struct sm4_to_tgsi_converter
76{
77	struct ureg_program* ureg;
78	std::vector<struct ureg_dst> temps;
79	std::vector<struct ureg_dst> outputs;
80	std::vector<struct ureg_src> inputs;
81	std::vector<struct ureg_src> samplers;
82	std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
83	std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
84	std::vector<std::pair<unsigned, unsigned> > loops;
85	sm4_insn* insn;
86	struct sm4_program& program;
87	std::vector<unsigned> sm4_to_tgsi_insn_num;
88	std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
89	bool in_sub;
90	bool avoid_txf;
91	bool avoid_int;
92
93	sm4_to_tgsi_converter(struct sm4_program& program)
94	: program(program)
95	{
96		avoid_txf = true;
97		avoid_int = false;
98	}
99
100	struct ureg_dst _reg(sm4_op& op)
101	{
102		switch(op.file)
103		{
104		case SM4_FILE_NULL:
105		{
106			struct ureg_dst d;
107			memset(&d, 0, sizeof(d));
108			d.File = TGSI_FILE_NULL;
109			return d;
110		}
111		case SM4_FILE_TEMP:
112			check(op.has_simple_index());
113			check(op.indices[0].disp < temps.size());
114			return temps[op.indices[0].disp];
115		case SM4_FILE_OUTPUT:
116			check(op.has_simple_index());
117			check(op.indices[0].disp < outputs.size());
118			return outputs[op.indices[0].disp];
119		default:
120			check(0);
121			return ureg_dst_undef();
122		}
123	}
124
125	struct ureg_dst _dst(unsigned i = 0)
126	{
127		check(i < insn->num_ops);
128		sm4_op& op = *insn->ops[i];
129		check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
130		struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
131		if(insn->insn.sat)
132			d = ureg_saturate(d);
133		return d;
134	}
135
136	struct ureg_src _src(unsigned i)
137	{
138		check(i < insn->num_ops);
139		sm4_op& op = *insn->ops[i];
140		struct ureg_src s;
141		switch(op.file)
142		{
143		case SM4_FILE_IMMEDIATE32:
144			s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
145			break;
146		case SM4_FILE_INPUT:
147			check(op.is_index_simple(0));
148			check(op.num_indices == 1 || op.num_indices == 2);
149			// TODO: is this correct, or are incorrectly swapping the two indices in the GS case?
150			check(op.indices[op.num_indices - 1].disp < inputs.size());
151			s = inputs[op.indices[op.num_indices - 1].disp];
152			if(op.num_indices == 2)
153			{
154				s.Dimension = 1;
155				s.DimensionIndex = op.indices[0].disp;
156			}
157			break;
158		case SM4_FILE_CONSTANT_BUFFER:
159			// TODO: indirect addressing
160			check(op.num_indices == 2);
161			check(op.is_index_simple(0));
162			check(op.is_index_simple(1));
163			s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
164			s.Dimension = 1;
165			s.DimensionIndex = op.indices[0].disp;
166			break;
167		default:
168			s = ureg_src(_reg(op));
169			break;
170		}
171		if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
172			s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
173		else
174		{
175			/* immediates are masked to show needed values */
176			check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
177		}
178		if(op.abs)
179			s = ureg_abs(s);
180		if(op.neg)
181			s = ureg_negate(s);
182		return s;
183	};
184
185	int _idx(sm4_file file, unsigned i = 0)
186	{
187		check(i < insn->num_ops);
188		sm4_op& op = *insn->ops[i];
189		check(op.file == file);
190		check(op.has_simple_index());
191		return (int)op.indices[0].disp;
192	}
193
194	int _texslot(bool have_sampler = true)
195	{
196		std::map<std::pair<int, int>, int>::iterator i;
197		i = program.resource_sampler_to_slot.find(std::make_pair(_idx(SM4_FILE_RESOURCE, 2), have_sampler ? _idx(SM4_FILE_SAMPLER, 3) : -1));
198		check(i != program.resource_sampler_to_slot.end());
199		return i->second;
200	}
201
202	unsigned tex_target(unsigned texslot)
203	{
204		unsigned mode = sampler_modes[program.slot_to_sampler[texslot]];
205		unsigned target;
206		if(mode)
207			target = targets[program.slot_to_resource[texslot]].second;
208		else
209			target = targets[program.slot_to_resource[texslot]].first;
210		check(target);
211		return target;
212	}
213
214	std::vector<struct ureg_dst> insn_tmps;
215
216	struct ureg_dst _tmp()
217	{
218		struct ureg_dst t = ureg_DECL_temporary(ureg);
219		insn_tmps.push_back(t);
220		return t;
221	}
222
223	struct ureg_dst _tmp(struct ureg_dst d)
224	{
225		if(d.File == TGSI_FILE_TEMPORARY)
226			return d;
227		else
228			return ureg_writemask(_tmp(), d.WriteMask);
229	}
230
231#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
232#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
233#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
234#define OP1(n) OP1_(n, n)
235#define OP2(n) OP2_(n, n)
236#define OP3(n) OP3_(n, n)
237#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
238
239	void translate_insns(unsigned begin, unsigned end)
240	{
241		for(unsigned insn_num = begin; insn_num < end; ++insn_num)
242		{
243			sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
244			unsigned label;
245			insn = program.insns[insn_num];
246			bool ok;
247			ok = true;
248			switch(insn->opcode)
249			{
250			// trivial instructions
251			case SM4_OPCODE_NOP:
252				break;
253			OP1(MOV);
254
255			// float
256			OP2(ADD);
257			OP2(MUL);
258			OP3(MAD);
259			OP2(DIV);
260			OP1(FRC);
261			OP1(RCP);
262			OP2(MIN);
263			OP2(MAX);
264			OP2_(LT, SLT);
265			OP2_(GE, SGE);
266			OP2_(EQ, SEQ);
267			OP2_(NE, SNE);
268
269			// bitwise
270			OP1(NOT);
271			OP2(AND);
272			OP2(OR);
273			OP2(XOR);
274
275			// special mathematical
276			OP2(DP2);
277			OP2(DP3);
278			OP2(DP4);
279			OP1(RSQ);
280			OP1_(LOG, LG2);
281			OP1_(EXP, EX2);
282
283			// rounding
284			OP1_(ROUND_NE, ROUND);
285			OP1_(ROUND_Z, TRUNC);
286			OP1_(ROUND_PI, CEIL);
287			OP1_(ROUND_NI, FLR);
288
289			// cross-thread
290			OP1_(DERIV_RTX, DDX);
291			OP1_(DERIV_RTX_COARSE, DDX);
292			OP1_(DERIV_RTX_FINE, DDX);
293			OP1_(DERIV_RTY, DDY);
294			OP1_(DERIV_RTY_COARSE, DDY);
295			OP1_(DERIV_RTY_FINE, DDY);
296			case SM4_OPCODE_EMIT:
297				ureg_EMIT(ureg);
298				break;
299			case SM4_OPCODE_CUT:
300				ureg_ENDPRIM(ureg);
301				break;
302			case SM4_OPCODE_EMITTHENCUT:
303				ureg_EMIT(ureg);
304				ureg_ENDPRIM(ureg);
305				break;
306
307			// non-trivial instructions
308			case SM4_OPCODE_MOVC:
309				/* CMP checks for < 0, but MOVC checks for != 0
310				 * but fortunately, x != 0 is equivalent to -abs(x) < 0
311				 * XXX: can test_nz apply to this?!
312				 */
313				ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
314				break;
315			case SM4_OPCODE_SQRT:
316			{
317				struct ureg_dst d = _dst();
318				struct ureg_dst t = _tmp(d);
319				ureg_RSQ(ureg, t, _src(1));
320				ureg_RCP(ureg, d, ureg_src(t));
321				break;
322			}
323			case SM4_OPCODE_SINCOS:
324			{
325				struct ureg_dst s = _dst(0);
326				struct ureg_dst c = _dst(1);
327				struct ureg_src v = _src(2);
328				if(s.File != TGSI_FILE_NULL)
329					ureg_SIN(ureg, s, v);
330				if(c.File != TGSI_FILE_NULL)
331					ureg_COS(ureg, c, v);
332				break;
333			}
334
335			// control flow
336			case SM4_OPCODE_DISCARD:
337				ureg_KIL(ureg, _src(0));
338				break;
339			OP_CF(LOOP, BGNLOOP);
340			OP_CF(ENDLOOP, ENDLOOP);
341			case SM4_OPCODE_BREAK:
342				ureg_BRK(ureg);
343				break;
344			case SM4_OPCODE_BREAKC:
345				// XXX: can test_nz apply to this?!
346				ureg_BREAKC(ureg, _src(0));
347				break;
348			case SM4_OPCODE_CONTINUE:
349				ureg_CONT(ureg);
350				break;
351			case SM4_OPCODE_CONTINUEC:
352				// XXX: can test_nz apply to this?!
353				ureg_IF(ureg, _src(0), &label);
354				ureg_CONT(ureg);
355				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
356				ureg_ENDIF(ureg);
357				break;
358			case SM4_OPCODE_SWITCH:
359				ureg_SWITCH(ureg, _src(0));
360				break;
361			case SM4_OPCODE_CASE:
362				ureg_CASE(ureg, _src(0));
363				break;
364			case SM4_OPCODE_DEFAULT:
365				ureg_DEFAULT(ureg);
366				break;
367			case SM4_OPCODE_ENDSWITCH:
368				ureg_ENDSWITCH(ureg);
369				break;
370			case SM4_OPCODE_CALL:
371				ureg_CAL(ureg, &label);
372				label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
373				break;
374			case SM4_OPCODE_LABEL:
375				if(in_sub)
376					ureg_ENDSUB(ureg);
377				else
378					ureg_END(ureg);
379				ureg_BGNSUB(ureg);
380				in_sub = true;
381				break;
382			case SM4_OPCODE_RET:
383				if(in_sub || insn_num != (program.insns.size() - 1))
384					ureg_RET(ureg);
385				break;
386			case SM4_OPCODE_RETC:
387				ureg_IF(ureg, _src(0), &label);
388				if(insn->insn.test_nz)
389					ureg_RET(ureg);
390				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
391				if(!insn->insn.test_nz)
392				{
393					ureg_ELSE(ureg, &label);
394					ureg_RET(ureg);
395					ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
396				}
397				ureg_ENDIF(ureg);
398				break;
399			OP_CF(ELSE, ELSE);
400			case SM4_OPCODE_ENDIF:
401				ureg_ENDIF(ureg);
402				break;
403			case SM4_OPCODE_IF:
404				if(insn->insn.test_nz)
405				{
406					ureg_IF(ureg, _src(0), &label);
407					label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
408				}
409				else
410				{
411					unsigned linked = program.cf_insn_linked[insn_num];
412					if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
413					{
414						ureg_IF(ureg, _src(0), &label);
415						ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
416						ureg_ELSE(ureg, &label);
417						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
418					}
419					else
420					{
421						/* we have to swap the branches in this case (fun!)
422						 * TODO: maybe just emit a SEQ 0?
423						 * */
424						unsigned endif = program.cf_insn_linked[linked];
425
426						ureg_IF(ureg, _src(0), &label);
427						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
428
429						translate_insns(linked + 1, endif);
430
431						sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
432						ureg_ELSE(ureg, &label);
433						label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
434
435						translate_insns(insn_num + 1, linked);
436
437						insn_num = endif - 1;
438						goto next;
439					}
440				}
441				break;
442			case SM4_OPCODE_RESINFO:
443			{
444				std::map<int, int>::iterator i;
445				i = program.resource_to_slot.find(_idx(SM4_FILE_RESOURCE, 2));
446				check(i != program.resource_to_slot.end());
447				unsigned texslot = i->second;
448
449				// no driver actually provides this, unfortunately
450				ureg_TXQ(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]);
451				break;
452			};
453			// TODO: sample offset, sample index
454			case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg (ouch)
455			case SM4_OPCODE_LD_MS:
456			{
457				unsigned texslot = _texslot(false);
458				unsigned dim = 0;
459				switch(targets[texslot].first)
460				{
461				case TGSI_TEXTURE_1D:
462					dim = 1;
463					break;
464				case TGSI_TEXTURE_2D:
465				case TGSI_TEXTURE_RECT:
466					dim = 2;
467					break;
468				case TGSI_TEXTURE_3D:
469					dim = 3;
470					break;
471				default:
472					check(0);
473				}
474				struct ureg_dst tmp = _tmp();
475				if(avoid_txf)
476				{
477					struct ureg_src texcoord;
478					if(!avoid_int)
479					{
480						ureg_I2F(ureg, tmp, _src(1));
481						texcoord = ureg_src(tmp);
482					}
483					else
484						texcoord = _src(1);
485
486					ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_swizzle(texcoord, 0, 1, 2, dim), samplers[texslot]);
487				}
488				else
489					ureg_TXF(ureg, _dst(), tex_target(texslot), ureg_swizzle(_src(1), 0, 1, 2, dim), samplers[texslot]);
490				break;
491			}
492			case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
493			{
494				unsigned texslot = _texslot();
495				ureg_TEX(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot]);
496				break;
497			}
498			case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
499			{
500				unsigned texslot = _texslot();
501				struct ureg_dst tmp = _tmp();
502				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
503				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
504				ureg_TXB(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
505				break;
506			}
507			case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
508			{
509				unsigned texslot = _texslot();
510				struct ureg_dst tmp = _tmp();
511				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1));
512				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0));
513				ureg_TEX(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
514				break;
515			}
516			case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
517			{
518				unsigned texslot = _texslot();
519				struct ureg_dst tmp = _tmp();
520				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), _src(1));
521				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_swizzle(_src(4), 0, 0, 0, 0));
522				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0));
523				ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
524				break;
525			}
526			case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
527			{
528				unsigned texslot = _texslot();
529				ureg_TXD(ureg, _dst(), tex_target(texslot), _src(1), samplers[texslot], _src(4), _src(5));
530				break;
531			}
532			case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
533			{
534				unsigned texslot = _texslot();
535				struct ureg_dst tmp = _tmp();
536				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
537				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
538				ureg_TXL(ureg, _dst(), tex_target(texslot), ureg_src(tmp), samplers[texslot]);
539				break;
540			}
541			default:
542				ok = false;
543				break;
544			}
545
546			if(!ok && !avoid_int)
547			{
548				ok = true;
549				switch(insn->opcode)
550				{
551				// integer
552				OP1_(ITOF, I2F);
553				OP1_(FTOI, F2I);
554				OP2_(IADD, UADD);
555				OP1(INEG);
556				OP2_(IMUL, UMUL);
557				OP3_(IMAD, UMAD);
558				OP2_(ISHL, SHL);
559				OP2_(ISHR, ISHR);
560				OP2(IMIN);
561				OP2(IMAX);
562				OP2_(ILT, ISLT);
563				OP2_(IGE, ISGE);
564				OP2_(IEQ, USEQ);
565				OP2_(INE, USNE);
566
567				// unsigned
568				OP1_(UTOF, U2F);
569				OP1_(FTOU, F2U);
570				OP2(UMUL);
571				OP3(UMAD);
572				OP2(UMIN);
573				OP2(UMAX);
574				OP2_(ULT, USLT);
575				OP2_(UGE, USGE);
576				OP2(USHR);
577
578				case SM4_OPCODE_UDIV:
579				{
580					struct ureg_dst q = _dst(0);
581					struct ureg_dst r = _dst(1);
582					struct ureg_src a = _src(2);
583					struct ureg_src b = _src(3);
584					if(q.File != TGSI_FILE_NULL)
585						ureg_UDIV(ureg, q, a, b);
586					if(r.File != TGSI_FILE_NULL)
587						ureg_UMOD(ureg, r, a, b);
588					break;
589				}
590				default:
591					ok = false;
592				}
593			}
594
595			if(!ok && avoid_int)
596			{
597				ok = true;
598				switch(insn->opcode)
599				{
600				case SM4_OPCODE_ITOF:
601				case SM4_OPCODE_UTOF:
602					break;
603				OP1_(FTOI, TRUNC);
604				OP1_(FTOU, FLR);
605				// integer
606				OP2_(IADD, ADD);
607				OP2_(IMUL, MUL);
608				OP3_(IMAD, MAD);
609				OP2_(MIN, MIN);
610				OP2_(MAX, MAX);
611				OP2_(ILT, SLT);
612				OP2_(IGE, SGE);
613				OP2_(IEQ, SEQ);
614				OP2_(INE, SNE);
615
616				// unsigned
617				OP2_(UMUL, MUL);
618				OP3_(UMAD, MAD);
619				OP2_(UMIN, MIN);
620				OP2_(UMAX, MAX);
621				OP2_(ULT, SLT);
622				OP2_(UGE, SGE);
623
624				case SM4_OPCODE_INEG:
625					ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
626					break;
627				case SM4_OPCODE_ISHL:
628				{
629					struct ureg_dst d = _dst();
630					struct ureg_dst t = _tmp(d);
631					ureg_EX2(ureg, t, _src(2));
632					ureg_MUL(ureg, d, ureg_src(t), _src(1));
633					break;
634				}
635				case SM4_OPCODE_ISHR:
636				case SM4_OPCODE_USHR:
637				{
638					struct ureg_dst d = _dst();
639					struct ureg_dst t = _tmp(d);
640					ureg_EX2(ureg, t, ureg_negate(_src(2)));
641					ureg_MUL(ureg, t, ureg_src(t), _src(1));
642					ureg_FLR(ureg, d, ureg_src(t));
643					break;
644				}
645				case SM4_OPCODE_UDIV:
646				{
647					struct ureg_dst q = _dst(0);
648					struct ureg_dst r = _dst(1);
649					struct ureg_src a = _src(2);
650					struct ureg_src b = _src(3);
651					struct ureg_dst f = _tmp();
652					ureg_DIV(ureg, f, a, b);
653					if(q.File != TGSI_FILE_NULL)
654						ureg_FLR(ureg, q, ureg_src(f));
655					if(r.File != TGSI_FILE_NULL)
656					{
657						ureg_FRC(ureg, f, ureg_src(f));
658						ureg_MUL(ureg, r, ureg_src(f), b);
659					}
660					break;
661				}
662				default:
663					ok = false;
664				}
665			}
666
667			check(ok);
668
669			if(!insn_tmps.empty())
670			{
671				for(unsigned i = 0; i < insn_tmps.size(); ++i)
672					ureg_release_temporary(ureg, insn_tmps[i]);
673				insn_tmps.clear();
674			}
675next:;
676		}
677	}
678
679	void* do_translate()
680	{
681		unsigned processor;
682		switch(program.version.type)
683		{
684		case 0:
685			processor = TGSI_PROCESSOR_FRAGMENT;
686			break;
687		case 1:
688			processor = TGSI_PROCESSOR_VERTEX;
689			break;
690		case 2:
691			processor = TGSI_PROCESSOR_GEOMETRY;
692			break;
693		default:
694			fail("Tessellation and compute shaders not yet supported");
695			return 0;
696		}
697
698		if(!sm4_link_cf_insns(program))
699			fail("Malformed control flow");
700		if(!sm4_find_labels(program))
701			fail("Failed to locate labels");
702		if(!sm4_allocate_resource_sampler_pairs(program))
703			fail("Unsupported (indirect?) accesses to resources and/or samplers");
704
705		ureg = ureg_create(processor);
706
707		in_sub = false;
708
709		for(unsigned i = 0; i < program.slot_to_resource.size(); ++i)
710			samplers.push_back(ureg_DECL_sampler(ureg, i));
711
712		sm4_to_tgsi_insn_num.resize(program.insns.size());
713		for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
714		{
715			sm4_dcl& dcl = *program.dcls[insn_num];
716			int idx = -1;
717			if(dcl.op.get() && dcl.op->is_index_simple(0))
718				idx = dcl.op->indices[0].disp;
719			switch(dcl.opcode)
720			{
721			case SM4_OPCODE_DCL_GLOBAL_FLAGS:
722				break;
723			case SM4_OPCODE_DCL_TEMPS:
724				for(unsigned i = 0; i < dcl.num; ++i)
725					temps.push_back(ureg_DECL_temporary(ureg));
726				break;
727			case SM4_OPCODE_DCL_INPUT:
728				check(idx >= 0);
729				if(processor == TGSI_PROCESSOR_VERTEX)
730				{
731					if(inputs.size() <= (unsigned)idx)
732						inputs.resize(idx + 1);
733					inputs[idx] = ureg_DECL_vs_input(ureg, idx);
734				}
735				else if(processor == TGSI_PROCESSOR_GEOMETRY)
736				{
737					// TODO: is this correct?
738					unsigned gsidx = dcl.op->indices[1].disp;
739					if(inputs.size() <= (unsigned)gsidx)
740						inputs.resize(gsidx + 1);
741					inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx);
742				}
743				else
744					check(0);
745				break;
746			case SM4_OPCODE_DCL_INPUT_PS:
747				check(idx >= 0);
748				if(inputs.size() <= (unsigned)idx)
749					inputs.resize(idx + 1);
750				inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid);
751				break;
752			case SM4_OPCODE_DCL_OUTPUT:
753				check(idx >= 0);
754				if(outputs.size() <= (unsigned)idx)
755					outputs.resize(idx + 1);
756				if(processor == TGSI_PROCESSOR_FRAGMENT)
757					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
758				else
759					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
760				break;
761			case SM4_OPCODE_DCL_INPUT_SIV:
762			case SM4_OPCODE_DCL_INPUT_SGV:
763			case SM4_OPCODE_DCL_INPUT_PS_SIV:
764			case SM4_OPCODE_DCL_INPUT_PS_SGV:
765				check(idx >= 0);
766				if(inputs.size() <= (unsigned)idx)
767					inputs.resize(idx + 1);
768				// TODO: is this correct?
769				inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
770				break;
771			case SM4_OPCODE_DCL_OUTPUT_SIV:
772			case SM4_OPCODE_DCL_OUTPUT_SGV:
773				check(idx >= 0);
774				if(outputs.size() <= (unsigned)idx)
775					outputs.resize(idx + 1);
776				check(sm4_to_pipe_sv[dcl.sv] >= 0);
777				outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
778				break;
779			case SM4_OPCODE_DCL_RESOURCE:
780				check(idx >= 0);
781				if(targets.size() <= (unsigned)idx)
782					targets.resize(idx + 1);
783				switch(dcl.dcl_resource.target)
784				{
785				case SM4_TARGET_TEXTURE1D:
786					targets[idx].first = TGSI_TEXTURE_1D;
787					targets[idx].second = TGSI_TEXTURE_SHADOW1D;
788					break;
789				case SM4_TARGET_TEXTURE2D:
790					targets[idx].first = TGSI_TEXTURE_2D;
791					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
792					break;
793				case SM4_TARGET_TEXTURE3D:
794					targets[idx].first = TGSI_TEXTURE_3D;
795					targets[idx].second = 0;
796					break;
797				case SM4_TARGET_TEXTURECUBE:
798					targets[idx].first = TGSI_TEXTURE_CUBE;
799					targets[idx].second = 0;
800					break;
801				default:
802					// HACK to make SimpleSample10 work
803					//check(0);
804					targets[idx].first = TGSI_TEXTURE_2D;
805					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
806					break;
807				}
808				break;
809			case SM4_OPCODE_DCL_SAMPLER:
810				check(idx >= 0);
811				if(sampler_modes.size() <= (unsigned)idx)
812					sampler_modes.resize(idx + 1);
813				check(!dcl.dcl_sampler.mono);
814				sampler_modes[idx] = dcl.dcl_sampler.shadow;
815				break;
816			case SM4_OPCODE_DCL_CONSTANT_BUFFER:
817				check(dcl.op->num_indices == 2);
818				check(dcl.op->is_index_simple(0));
819				check(dcl.op->is_index_simple(1));
820				idx = dcl.op->indices[0].disp;
821				ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
822				break;
823			case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
824				ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]);
825				break;
826			case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
827				ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]);
828				break;
829			case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
830				ureg_property_gs_max_vertices(ureg, dcl.num);
831				break;
832			default:
833				check(0);
834			}
835		}
836
837		translate_insns(0, program.insns.size());
838		sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
839		if(in_sub)
840			ureg_ENDSUB(ureg);
841		else
842			ureg_END(ureg);
843
844		for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
845			ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
846
847		const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
848		ureg_destroy(ureg);
849		return (void*)tokens;
850	}
851
852	void* translate()
853	{
854		try
855		{
856			return do_translate();
857		}
858		catch(const char*)
859		{
860			return 0;
861		}
862	}
863};
864
865void* sm4_to_tgsi(struct sm4_program& program)
866{
867	sm4_to_tgsi_converter conv(program);
868	return conv.translate();
869}
870