1/**************************************************************************
2 *
3 * Copyright 2010 Luca Barbieri
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#include <d3d11shader.h>
28#include "d3d1xstutil.h"
29#include "sm4.h"
30#include "tgsi/tgsi_ureg.h"
31#include <vector>
32
33#if 1
34#define check(x) assert(x)
35#define fail(x) assert(0 && (x))
36#else
37#define check(x) do {if(!(x)) throw(#x);} while(0)
38#define fail(x) throw(x)
39#endif
40
41struct tgsi_interpolation
42{
43	unsigned interpolation;
44	bool centroid;
45};
46
47static tgsi_interpolation sm4_to_pipe_interpolation[] =
48{
49	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */
50	{TGSI_INTERPOLATE_CONSTANT, false},
51	{TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */
52	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */
53	{TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */
54	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */
55
56	// Added in D3D10.1
57	{TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */
58	{TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */
59};
60
61static int sm4_to_pipe_sv[] =
62{
63	-1,
64	TGSI_SEMANTIC_POSITION,
65	-1, /*TGSI_SEMANTIC_CLIP_DISTANCE */
66	-1, /*TGSI_SEMANTIC_CULL_DISTANCE */
67	-1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */
68	-1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */
69	-1, /*TGSI_SEMANTIC_VERTEXID,*/
70	TGSI_SEMANTIC_PRIMID,
71	TGSI_SEMANTIC_INSTANCEID,
72	TGSI_SEMANTIC_FACE,
73	-1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/
74};
75
76struct sm4_to_tgsi_converter
77{
78	struct ureg_program* ureg;
79	std::vector<struct ureg_dst> temps;
80	std::vector<struct ureg_dst> outputs;
81	std::vector<struct ureg_src> inputs;
82	std::vector<struct ureg_src> resources;
83	std::vector<struct ureg_src> samplers;
84	std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison
85	std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison
86	std::vector<std::pair<unsigned, unsigned> > loops;
87	sm4_insn* insn;
88	struct sm4_program& program;
89	std::vector<unsigned> sm4_to_tgsi_insn_num;
90	std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num;
91	bool in_sub;
92	bool avoid_txf;
93	bool avoid_int;
94
95	sm4_to_tgsi_converter(struct sm4_program& program)
96	: program(program)
97	{
98		avoid_txf = true;
99		avoid_int = false;
100	}
101
102	struct ureg_dst _reg(sm4_op& op)
103	{
104		switch(op.file)
105		{
106		case SM4_FILE_NULL:
107		{
108			struct ureg_dst d;
109			memset(&d, 0, sizeof(d));
110			d.File = TGSI_FILE_NULL;
111			return d;
112		}
113		case SM4_FILE_TEMP:
114			check(op.has_simple_index());
115			check(op.indices[0].disp < temps.size());
116			return temps[op.indices[0].disp];
117		case SM4_FILE_OUTPUT:
118			check(op.has_simple_index());
119			check(op.indices[0].disp < outputs.size());
120			return outputs[op.indices[0].disp];
121		default:
122			check(0);
123			return ureg_dst_undef();
124		}
125	}
126
127	struct ureg_dst _dst(unsigned i = 0)
128	{
129		check(i < insn->num_ops);
130		sm4_op& op = *insn->ops[i];
131		check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR);
132		struct ureg_dst d = ureg_writemask(_reg(op), op.mask);
133		if(insn->insn.sat)
134			d = ureg_saturate(d);
135		return d;
136	}
137
138	struct ureg_src _src(unsigned i)
139	{
140		check(i < insn->num_ops);
141		sm4_op& op = *insn->ops[i];
142		struct ureg_src s;
143		switch(op.file)
144		{
145		case SM4_FILE_IMMEDIATE32:
146			s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32);
147			break;
148		case SM4_FILE_INPUT:
149			check(op.is_index_simple(0));
150			check(op.num_indices == 1 || op.num_indices == 2);
151			// TODO: is this correct, or are incorrectly swapping the two indices in the GS case?
152			check(op.indices[op.num_indices - 1].disp < inputs.size());
153			s = inputs[op.indices[op.num_indices - 1].disp];
154			if(op.num_indices == 2)
155			{
156				s.Dimension = 1;
157				s.DimensionIndex = op.indices[0].disp;
158			}
159			break;
160		case SM4_FILE_CONSTANT_BUFFER:
161			// TODO: indirect addressing
162			check(op.num_indices == 2);
163			check(op.is_index_simple(0));
164			check(op.is_index_simple(1));
165			s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp);
166			s.Dimension = 1;
167			s.DimensionIndex = op.indices[0].disp;
168			break;
169		default:
170			s = ureg_src(_reg(op));
171			break;
172		}
173		if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR)
174			s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]);
175		else
176		{
177			/* immediates are masked to show needed values */
178			check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64);
179		}
180		if(op.abs)
181			s = ureg_abs(s);
182		if(op.neg)
183			s = ureg_negate(s);
184		return s;
185	};
186
187	int _idx(sm4_file file, unsigned i = 0)
188	{
189		check(i < insn->num_ops);
190		sm4_op& op = *insn->ops[i];
191		check(op.file == file);
192		check(op.has_simple_index());
193		return (int)op.indices[0].disp;
194	}
195
196	unsigned tex_target(unsigned resource, unsigned sampler)
197	{
198		unsigned shadow = sampler_modes[sampler];
199		unsigned target = shadow ? targets[resource].second : targets[resource].first;
200		check(target);
201		return target;
202	}
203
204	enum pipe_type res_return_type(unsigned type)
205	{
206		switch(type)
207		{
208		case D3D_RETURN_TYPE_UNORM: return PIPE_TYPE_UNORM;
209		case D3D_RETURN_TYPE_SNORM: return PIPE_TYPE_SNORM;
210		case D3D_RETURN_TYPE_SINT:  return PIPE_TYPE_SINT;
211		case D3D_RETURN_TYPE_UINT:  return PIPE_TYPE_UINT;
212		case D3D_RETURN_TYPE_FLOAT: return PIPE_TYPE_FLOAT;
213		default:
214			fail("invalid resource return type");
215			return PIPE_TYPE_FLOAT;
216		}
217	}
218
219	std::vector<struct ureg_dst> insn_tmps;
220
221	struct ureg_dst _tmp()
222	{
223		struct ureg_dst t = ureg_DECL_temporary(ureg);
224		insn_tmps.push_back(t);
225		return t;
226	}
227
228	struct ureg_dst _tmp(struct ureg_dst d)
229	{
230		if(d.File == TGSI_FILE_TEMPORARY)
231			return d;
232		else
233			return ureg_writemask(_tmp(), d.WriteMask);
234	}
235
236#define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break
237#define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break
238#define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break
239#define OP1(n) OP1_(n, n)
240#define OP2(n) OP2_(n, n)
241#define OP3(n) OP3_(n, n)
242#define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break;
243
244	void translate_insns(unsigned begin, unsigned end)
245	{
246		for(unsigned insn_num = begin; insn_num < end; ++insn_num)
247		{
248			sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg);
249			unsigned label;
250			insn = program.insns[insn_num];
251			bool ok;
252			ok = true;
253			switch(insn->opcode)
254			{
255			// trivial instructions
256			case SM4_OPCODE_NOP:
257				break;
258			OP1(MOV);
259
260			// float
261			OP2(ADD);
262			OP2(MUL);
263			OP3(MAD);
264			OP2(DIV);
265			OP1(FRC);
266			OP1(RCP);
267			OP2(MIN);
268			OP2(MAX);
269			OP2_(LT, SLT);
270			OP2_(GE, SGE);
271			OP2_(EQ, SEQ);
272			OP2_(NE, SNE);
273
274			// bitwise
275			OP1(NOT);
276			OP2(AND);
277			OP2(OR);
278			OP2(XOR);
279
280			// special mathematical
281			OP2(DP2);
282			OP2(DP3);
283			OP2(DP4);
284			OP1(RSQ);
285			OP1_(LOG, LG2);
286			OP1_(EXP, EX2);
287
288			// rounding
289			OP1_(ROUND_NE, ROUND);
290			OP1_(ROUND_Z, TRUNC);
291			OP1_(ROUND_PI, CEIL);
292			OP1_(ROUND_NI, FLR);
293
294			// cross-thread
295			OP1_(DERIV_RTX, DDX);
296			OP1_(DERIV_RTX_COARSE, DDX);
297			OP1_(DERIV_RTX_FINE, DDX);
298			OP1_(DERIV_RTY, DDY);
299			OP1_(DERIV_RTY_COARSE, DDY);
300			OP1_(DERIV_RTY_FINE, DDY);
301			case SM4_OPCODE_EMIT:
302				ureg_EMIT(ureg);
303				break;
304			case SM4_OPCODE_CUT:
305				ureg_ENDPRIM(ureg);
306				break;
307			case SM4_OPCODE_EMITTHENCUT:
308				ureg_EMIT(ureg);
309				ureg_ENDPRIM(ureg);
310				break;
311
312			// non-trivial instructions
313			case SM4_OPCODE_MOVC:
314				/* CMP checks for < 0, but MOVC checks for != 0
315				 * but fortunately, x != 0 is equivalent to -abs(x) < 0
316				 * XXX: can test_nz apply to this?!
317				 */
318				ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3));
319				break;
320			case SM4_OPCODE_SQRT:
321			{
322				struct ureg_dst d = _dst();
323				struct ureg_dst t = _tmp(d);
324				ureg_RSQ(ureg, t, _src(1));
325				ureg_RCP(ureg, d, ureg_src(t));
326				break;
327			}
328			case SM4_OPCODE_SINCOS:
329			{
330				struct ureg_dst s = _dst(0);
331				struct ureg_dst c = _dst(1);
332				struct ureg_src v = _src(2);
333				if(s.File != TGSI_FILE_NULL)
334					ureg_SIN(ureg, s, v);
335				if(c.File != TGSI_FILE_NULL)
336					ureg_COS(ureg, c, v);
337				break;
338			}
339
340			// control flow
341			case SM4_OPCODE_DISCARD:
342				ureg_KIL(ureg, _src(0));
343				break;
344			OP_CF(LOOP, BGNLOOP);
345			OP_CF(ENDLOOP, ENDLOOP);
346			case SM4_OPCODE_BREAK:
347				ureg_BRK(ureg);
348				break;
349			case SM4_OPCODE_BREAKC:
350				// XXX: can test_nz apply to this?!
351				ureg_BREAKC(ureg, _src(0));
352				break;
353			case SM4_OPCODE_CONTINUE:
354				ureg_CONT(ureg);
355				break;
356			case SM4_OPCODE_CONTINUEC:
357				// XXX: can test_nz apply to this?!
358				ureg_IF(ureg, _src(0), &label);
359				ureg_CONT(ureg);
360				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
361				ureg_ENDIF(ureg);
362				break;
363			case SM4_OPCODE_SWITCH:
364				ureg_SWITCH(ureg, _src(0));
365				break;
366			case SM4_OPCODE_CASE:
367				ureg_CASE(ureg, _src(0));
368				break;
369			case SM4_OPCODE_DEFAULT:
370				ureg_DEFAULT(ureg);
371				break;
372			case SM4_OPCODE_ENDSWITCH:
373				ureg_ENDSWITCH(ureg);
374				break;
375			case SM4_OPCODE_CALL:
376				ureg_CAL(ureg, &label);
377				label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)]));
378				break;
379			case SM4_OPCODE_LABEL:
380				if(in_sub)
381					ureg_ENDSUB(ureg);
382				else
383					ureg_END(ureg);
384				ureg_BGNSUB(ureg);
385				in_sub = true;
386				break;
387			case SM4_OPCODE_RET:
388				if(in_sub || insn_num != (program.insns.size() - 1))
389					ureg_RET(ureg);
390				break;
391			case SM4_OPCODE_RETC:
392				ureg_IF(ureg, _src(0), &label);
393				if(insn->insn.test_nz)
394					ureg_RET(ureg);
395				ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
396				if(!insn->insn.test_nz)
397				{
398					ureg_ELSE(ureg, &label);
399					ureg_RET(ureg);
400					ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
401				}
402				ureg_ENDIF(ureg);
403				break;
404			OP_CF(ELSE, ELSE);
405			case SM4_OPCODE_ENDIF:
406				ureg_ENDIF(ureg);
407				break;
408			case SM4_OPCODE_IF:
409				if(insn->insn.test_nz)
410				{
411					ureg_IF(ureg, _src(0), &label);
412					label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num]));
413				}
414				else
415				{
416					unsigned linked = program.cf_insn_linked[insn_num];
417					if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF)
418					{
419						ureg_IF(ureg, _src(0), &label);
420						ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg));
421						ureg_ELSE(ureg, &label);
422						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
423					}
424					else
425					{
426						/* we have to swap the branches in this case (fun!)
427						 * TODO: maybe just emit a SEQ 0?
428						 * */
429						unsigned endif = program.cf_insn_linked[linked];
430
431						ureg_IF(ureg, _src(0), &label);
432						label_to_sm4_insn_num.push_back(std::make_pair(label, linked));
433
434						translate_insns(linked + 1, endif);
435
436						sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg);
437						ureg_ELSE(ureg, &label);
438						label_to_sm4_insn_num.push_back(std::make_pair(label, endif));
439
440						translate_insns(insn_num + 1, linked);
441
442						insn_num = endif - 1;
443						goto next;
444					}
445				}
446				break;
447			case SM4_OPCODE_RESINFO:
448				// TODO: return type
449				ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
450				break;
451			// TODO: sample index, texture offset
452			case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg
453				ureg_LOAD(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
454				break;
455			case SM4_OPCODE_LD_MS:
456				ureg_LOAD_MS(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
457				break;
458			case SM4_OPCODE_SAMPLE: // dst, coord, res, samp
459				ureg_SAMPLE(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
460				break;
461			case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x
462				ureg_SAMPLE_B(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
463				break;
464			case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x
465				ureg_SAMPLE_C(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
466				break;
467			case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x
468				ureg_SAMPLE_C_LZ(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4));
469				break;
470			case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy
471				ureg_SAMPLE_D(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4), _src(5));
472				break;
473			case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x
474			{
475				struct ureg_dst tmp = _tmp();
476				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1));
477				ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0));
478				ureg_SAMPLE_L(ureg, _dst(), ureg_src(tmp), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]);
479				break;
480			}
481			default:
482				ok = false;
483				break;
484			}
485
486			if(!ok && !avoid_int)
487			{
488				ok = true;
489				switch(insn->opcode)
490				{
491				// integer
492				OP1_(ITOF, I2F);
493				OP1_(FTOI, F2I);
494				OP2_(IADD, UADD);
495				OP1(INEG);
496				OP2_(IMUL, UMUL);
497				OP3_(IMAD, UMAD);
498				OP2_(ISHL, SHL);
499				OP2_(ISHR, ISHR);
500				OP2(IMIN);
501				OP2(IMAX);
502				OP2_(ILT, ISLT);
503				OP2_(IGE, ISGE);
504				OP2_(IEQ, USEQ);
505				OP2_(INE, USNE);
506
507				// unsigned
508				OP1_(UTOF, U2F);
509				OP1_(FTOU, F2U);
510				OP2(UMUL);
511				OP3(UMAD);
512				OP2(UMIN);
513				OP2(UMAX);
514				OP2_(ULT, USLT);
515				OP2_(UGE, USGE);
516				OP2(USHR);
517
518				case SM4_OPCODE_UDIV:
519				{
520					struct ureg_dst q = _dst(0);
521					struct ureg_dst r = _dst(1);
522					struct ureg_src a = _src(2);
523					struct ureg_src b = _src(3);
524					if(q.File != TGSI_FILE_NULL)
525						ureg_UDIV(ureg, q, a, b);
526					if(r.File != TGSI_FILE_NULL)
527						ureg_UMOD(ureg, r, a, b);
528					break;
529				}
530				default:
531					ok = false;
532				}
533			}
534
535			if(!ok && avoid_int)
536			{
537				ok = true;
538				switch(insn->opcode)
539				{
540				case SM4_OPCODE_ITOF:
541				case SM4_OPCODE_UTOF:
542					break;
543				OP1_(FTOI, TRUNC);
544				OP1_(FTOU, FLR);
545				// integer
546				OP2_(IADD, ADD);
547				OP2_(IMUL, MUL);
548				OP3_(IMAD, MAD);
549				OP2_(MIN, MIN);
550				OP2_(MAX, MAX);
551				OP2_(ILT, SLT);
552				OP2_(IGE, SGE);
553				OP2_(IEQ, SEQ);
554				OP2_(INE, SNE);
555
556				// unsigned
557				OP2_(UMUL, MUL);
558				OP3_(UMAD, MAD);
559				OP2_(UMIN, MIN);
560				OP2_(UMAX, MAX);
561				OP2_(ULT, SLT);
562				OP2_(UGE, SGE);
563
564				case SM4_OPCODE_INEG:
565					ureg_MOV(ureg, _dst(), ureg_negate(_src(1)));
566					break;
567				case SM4_OPCODE_ISHL:
568				{
569					struct ureg_dst d = _dst();
570					struct ureg_dst t = _tmp(d);
571					ureg_EX2(ureg, t, _src(2));
572					ureg_MUL(ureg, d, ureg_src(t), _src(1));
573					break;
574				}
575				case SM4_OPCODE_ISHR:
576				case SM4_OPCODE_USHR:
577				{
578					struct ureg_dst d = _dst();
579					struct ureg_dst t = _tmp(d);
580					ureg_EX2(ureg, t, ureg_negate(_src(2)));
581					ureg_MUL(ureg, t, ureg_src(t), _src(1));
582					ureg_FLR(ureg, d, ureg_src(t));
583					break;
584				}
585				case SM4_OPCODE_UDIV:
586				{
587					struct ureg_dst q = _dst(0);
588					struct ureg_dst r = _dst(1);
589					struct ureg_src a = _src(2);
590					struct ureg_src b = _src(3);
591					struct ureg_dst f = _tmp();
592					ureg_DIV(ureg, f, a, b);
593					if(q.File != TGSI_FILE_NULL)
594						ureg_FLR(ureg, q, ureg_src(f));
595					if(r.File != TGSI_FILE_NULL)
596					{
597						ureg_FRC(ureg, f, ureg_src(f));
598						ureg_MUL(ureg, r, ureg_src(f), b);
599					}
600					break;
601				}
602				default:
603					ok = false;
604				}
605			}
606
607			check(ok);
608
609			if(!insn_tmps.empty())
610			{
611				for(unsigned i = 0; i < insn_tmps.size(); ++i)
612					ureg_release_temporary(ureg, insn_tmps[i]);
613				insn_tmps.clear();
614			}
615next:;
616		}
617	}
618
619	void* do_translate()
620	{
621		unsigned processor;
622		switch(program.version.type)
623		{
624		case 0:
625			processor = TGSI_PROCESSOR_FRAGMENT;
626			break;
627		case 1:
628			processor = TGSI_PROCESSOR_VERTEX;
629			break;
630		case 2:
631			processor = TGSI_PROCESSOR_GEOMETRY;
632			break;
633		default:
634			fail("Tessellation and compute shaders not yet supported");
635			return 0;
636		}
637
638		if(!sm4_link_cf_insns(program))
639			fail("Malformed control flow");
640		if(!sm4_find_labels(program))
641			fail("Failed to locate labels");
642
643		ureg = ureg_create(processor);
644
645		in_sub = false;
646
647		sm4_to_tgsi_insn_num.resize(program.insns.size());
648		for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num)
649		{
650			sm4_dcl& dcl = *program.dcls[insn_num];
651			int idx = -1;
652			if(dcl.op.get() && dcl.op->is_index_simple(0))
653				idx = dcl.op->indices[0].disp;
654			switch(dcl.opcode)
655			{
656			case SM4_OPCODE_DCL_GLOBAL_FLAGS:
657				break;
658			case SM4_OPCODE_DCL_TEMPS:
659				for(unsigned i = 0; i < dcl.num; ++i)
660					temps.push_back(ureg_DECL_temporary(ureg));
661				break;
662			case SM4_OPCODE_DCL_INPUT:
663				check(idx >= 0);
664				if(processor == TGSI_PROCESSOR_VERTEX)
665				{
666					if(inputs.size() <= (unsigned)idx)
667						inputs.resize(idx + 1);
668					inputs[idx] = ureg_DECL_vs_input(ureg, idx);
669				}
670				else if(processor == TGSI_PROCESSOR_GEOMETRY)
671				{
672					// TODO: is this correct?
673					unsigned gsidx = dcl.op->indices[1].disp;
674					if(inputs.size() <= (unsigned)gsidx)
675						inputs.resize(gsidx + 1);
676					inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx);
677				}
678				else
679					check(0);
680				break;
681			case SM4_OPCODE_DCL_INPUT_PS:
682				check(idx >= 0);
683				if(inputs.size() <= (unsigned)idx)
684					inputs.resize(idx + 1);
685				inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid);
686				break;
687			case SM4_OPCODE_DCL_OUTPUT:
688				check(idx >= 0);
689				if(outputs.size() <= (unsigned)idx)
690					outputs.resize(idx + 1);
691				if(processor == TGSI_PROCESSOR_FRAGMENT)
692					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx);
693				else
694					outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx);
695				break;
696			case SM4_OPCODE_DCL_INPUT_SIV:
697			case SM4_OPCODE_DCL_INPUT_SGV:
698			case SM4_OPCODE_DCL_INPUT_PS_SIV:
699			case SM4_OPCODE_DCL_INPUT_PS_SGV:
700				check(idx >= 0);
701				if(inputs.size() <= (unsigned)idx)
702					inputs.resize(idx + 1);
703				// TODO: is this correct?
704				inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0);
705				break;
706			case SM4_OPCODE_DCL_OUTPUT_SIV:
707			case SM4_OPCODE_DCL_OUTPUT_SGV:
708				check(idx >= 0);
709				if(outputs.size() <= (unsigned)idx)
710					outputs.resize(idx + 1);
711				check(sm4_to_pipe_sv[dcl.sv] >= 0);
712				outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0);
713				break;
714			case SM4_OPCODE_DCL_RESOURCE:
715				check(idx >= 0);
716				if(targets.size() <= (unsigned)idx)
717					targets.resize(idx + 1);
718				switch(dcl.dcl_resource.target)
719				{
720				case SM4_TARGET_TEXTURE1D:
721					targets[idx].first = TGSI_TEXTURE_1D;
722					targets[idx].second = TGSI_TEXTURE_SHADOW1D;
723					break;
724				case SM4_TARGET_TEXTURE1DARRAY:
725					targets[idx].first = TGSI_TEXTURE_1D_ARRAY;
726					targets[idx].second = TGSI_TEXTURE_SHADOW1D_ARRAY;
727					break;
728				case SM4_TARGET_TEXTURE2D:
729					targets[idx].first = TGSI_TEXTURE_2D;
730					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
731					break;
732				case SM4_TARGET_TEXTURE2DARRAY:
733					targets[idx].first = TGSI_TEXTURE_2D_ARRAY;
734					targets[idx].second = TGSI_TEXTURE_SHADOW2D_ARRAY;
735					break;
736				case SM4_TARGET_TEXTURE3D:
737					targets[idx].first = TGSI_TEXTURE_3D;
738					targets[idx].second = 0;
739					break;
740				case SM4_TARGET_TEXTURECUBE:
741					targets[idx].first = TGSI_TEXTURE_CUBE;
742					targets[idx].second = 0;
743					break;
744				default:
745					// HACK to make SimpleSample10 work
746					//check(0);
747					targets[idx].first = TGSI_TEXTURE_2D;
748					targets[idx].second = TGSI_TEXTURE_SHADOW2D;
749					break;
750				}
751				if(resources.size() <= (unsigned)idx)
752					resources.resize(idx + 1);
753				resources[idx] = ureg_DECL_sampler_view(
754                                   ureg, idx, targets[idx].first,
755                                   res_return_type(dcl.rrt.x),
756                                   res_return_type(dcl.rrt.y),
757                                   res_return_type(dcl.rrt.z),
758                                   res_return_type(dcl.rrt.w));
759				break;
760			case SM4_OPCODE_DCL_SAMPLER:
761				check(idx >= 0);
762				if(sampler_modes.size() <= (unsigned)idx)
763					sampler_modes.resize(idx + 1);
764				check(!dcl.dcl_sampler.mono);
765				sampler_modes[idx] = dcl.dcl_sampler.shadow;
766				if(samplers.size() <= (unsigned)idx)
767					samplers.resize(idx + 1);
768				samplers[idx] = ureg_DECL_sampler(ureg, idx);
769				break;
770			case SM4_OPCODE_DCL_CONSTANT_BUFFER:
771				check(dcl.op->num_indices == 2);
772				check(dcl.op->is_index_simple(0));
773				check(dcl.op->is_index_simple(1));
774				idx = dcl.op->indices[0].disp;
775				ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx);
776				break;
777			case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
778				ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]);
779				break;
780			case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
781				ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]);
782				break;
783			case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
784				ureg_property_gs_max_vertices(ureg, dcl.num);
785				break;
786			default:
787				check(0);
788			}
789		}
790
791		translate_insns(0, program.insns.size());
792		sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg));
793		if(in_sub)
794			ureg_ENDSUB(ureg);
795		else
796			ureg_END(ureg);
797
798		for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i)
799			ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]);
800
801		const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0);
802		ureg_destroy(ureg);
803		return (void*)tokens;
804	}
805
806	void* translate()
807	{
808		try
809		{
810			return do_translate();
811		}
812		catch(const char*)
813		{
814			return 0;
815		}
816	}
817};
818
819void* sm4_to_tgsi(struct sm4_program& program)
820{
821	sm4_to_tgsi_converter conv(program);
822	return conv.translate();
823}
824
825void* sm4_to_tgsi_linkage_only(struct sm4_program& prog)
826{
827	struct ureg_program* ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
828
829	uint64_t already = 0;
830	for(unsigned n = 0, i = 0; i < prog.num_params_out; ++i)
831	{
832		unsigned sn, si;
833
834		if(already & (1ULL << prog.params_out[i].Register))
835			continue;
836		already |= 1ULL << prog.params_out[i].Register;
837
838		switch(prog.params_out[i].SystemValueType)
839		{
840		case D3D_NAME_UNDEFINED:
841			sn = TGSI_SEMANTIC_GENERIC;
842			si = n++;
843			break;
844		case D3D_NAME_CULL_DISTANCE:
845		case D3D_NAME_CLIP_DISTANCE:
846			// FIXME
847			sn = 0;
848			si = prog.params_out[i].SemanticIndex;
849			assert(0);
850			break;
851		default:
852			continue;
853		}
854
855		ureg_DECL_output(ureg, sn, si);
856	}
857
858	const struct tgsi_token* tokens = ureg_get_tokens(ureg, 0);
859	ureg_destroy(ureg);
860	return (void*)tokens;
861}
862