1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include "radeon_program_pair.h"
29
30#include "radeon_compiler.h"
31#include "radeon_compiler_util.h"
32
33
34/**
35 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
36 * and reverse the order of arguments for CMP.
37 */
38static void final_rewrite(struct rc_sub_instruction *inst)
39{
40	struct rc_src_register tmp;
41
42	switch(inst->Opcode) {
43	case RC_OPCODE_ADD:
44		inst->SrcReg[2] = inst->SrcReg[1];
45		inst->SrcReg[1].File = RC_FILE_NONE;
46		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
47		inst->SrcReg[1].Negate = RC_MASK_NONE;
48		inst->Opcode = RC_OPCODE_MAD;
49		break;
50	case RC_OPCODE_CMP:
51		tmp = inst->SrcReg[2];
52		inst->SrcReg[2] = inst->SrcReg[0];
53		inst->SrcReg[0] = tmp;
54		break;
55	case RC_OPCODE_MOV:
56		/* AMD say we should use CMP.
57		 * However, when we transform
58		 *  KIL -r0;
59		 * into
60		 *  CMP tmp, -r0, -r0, 0;
61		 *  KIL tmp;
62		 * we get incorrect behaviour on R500 when r0 == 0.0.
63		 * It appears that the R500 KIL hardware treats -0.0 as less
64		 * than zero.
65		 */
66		inst->SrcReg[1].File = RC_FILE_NONE;
67		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
68		inst->SrcReg[2].File = RC_FILE_NONE;
69		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
70		inst->Opcode = RC_OPCODE_MAD;
71		break;
72	case RC_OPCODE_MUL:
73		inst->SrcReg[2].File = RC_FILE_NONE;
74		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
75		inst->Opcode = RC_OPCODE_MAD;
76		break;
77	default:
78		/* nothing to do */
79		break;
80	}
81}
82
83
84/**
85 * Classify an instruction according to which ALUs etc. it needs
86 */
87static void classify_instruction(struct rc_sub_instruction * inst,
88	int * needrgb, int * needalpha, int * istranscendent)
89{
90	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
91	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
92	*istranscendent = 0;
93
94	if (inst->WriteALUResult == RC_ALURESULT_X)
95		*needrgb = 1;
96	else if (inst->WriteALUResult == RC_ALURESULT_W)
97		*needalpha = 1;
98
99	switch(inst->Opcode) {
100	case RC_OPCODE_ADD:
101	case RC_OPCODE_CMP:
102	case RC_OPCODE_CND:
103	case RC_OPCODE_DDX:
104	case RC_OPCODE_DDY:
105	case RC_OPCODE_FRC:
106	case RC_OPCODE_MAD:
107	case RC_OPCODE_MAX:
108	case RC_OPCODE_MIN:
109	case RC_OPCODE_MOV:
110	case RC_OPCODE_MUL:
111		break;
112	case RC_OPCODE_COS:
113	case RC_OPCODE_EX2:
114	case RC_OPCODE_LG2:
115	case RC_OPCODE_RCP:
116	case RC_OPCODE_RSQ:
117	case RC_OPCODE_SIN:
118		*istranscendent = 1;
119		*needalpha = 1;
120		break;
121	case RC_OPCODE_DP4:
122		*needalpha = 1;
123		/* fall through */
124	case RC_OPCODE_DP3:
125		*needrgb = 1;
126		break;
127	default:
128		break;
129	}
130}
131
132static void src_uses(struct rc_src_register src, unsigned int * rgb,
133							unsigned int * alpha)
134{
135	int j;
136	for(j = 0; j < 4; ++j) {
137		unsigned int swz = GET_SWZ(src.Swizzle, j);
138		if (swz < 3)
139			*rgb = 1;
140		else if (swz < 4)
141			*alpha = 1;
142	}
143}
144
145/**
146 * Fill the given ALU instruction's opcodes and source operands into the given pair,
147 * if possible.
148 */
149static void set_pair_instruction(struct r300_fragment_program_compiler *c,
150	struct rc_pair_instruction * pair,
151	struct rc_sub_instruction * inst)
152{
153	int needrgb, needalpha, istranscendent;
154	const struct rc_opcode_info * opcode;
155	int i;
156
157	memset(pair, 0, sizeof(struct rc_pair_instruction));
158
159	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
160
161	if (needrgb) {
162		if (istranscendent)
163			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
164		else
165			pair->RGB.Opcode = inst->Opcode;
166		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
167			pair->RGB.Saturate = 1;
168	}
169	if (needalpha) {
170		pair->Alpha.Opcode = inst->Opcode;
171		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
172			pair->Alpha.Saturate = 1;
173	}
174
175	opcode = rc_get_opcode_info(inst->Opcode);
176
177	/* Presubtract handling:
178	 * We need to make sure that the values used by the presubtract
179	 * operation end up in src0 or src1. */
180	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
181		/* rc_pair_alloc_source() will fill in data for
182		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
183		int j;
184		for(j = 0; j < 3; j++) {
185			int src_regs;
186			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
187				continue;
188
189			src_regs = rc_presubtract_src_reg_count(
190							inst->PreSub.Opcode);
191			for(i = 0; i < src_regs; i++) {
192				unsigned int rgb = 0;
193				unsigned int alpha = 0;
194				src_uses(inst->SrcReg[j], &rgb, &alpha);
195				if(rgb) {
196					pair->RGB.Src[i].File =
197						inst->PreSub.SrcReg[i].File;
198					pair->RGB.Src[i].Index =
199						inst->PreSub.SrcReg[i].Index;
200					pair->RGB.Src[i].Used = 1;
201				}
202				if(alpha) {
203					pair->Alpha.Src[i].File =
204						inst->PreSub.SrcReg[i].File;
205					pair->Alpha.Src[i].Index =
206						inst->PreSub.SrcReg[i].Index;
207					pair->Alpha.Src[i].Used = 1;
208				}
209			}
210		}
211	}
212
213	for(i = 0; i < opcode->NumSrcRegs; ++i) {
214		int source;
215		if (needrgb && !istranscendent) {
216			unsigned int srcrgb = 0;
217			unsigned int srcalpha = 0;
218			unsigned int srcmask = 0;
219			int j;
220			/* We don't care about the alpha channel here.  We only
221			 * want the part of the swizzle that writes to rgb,
222			 * since we are creating an rgb instruction. */
223			for(j = 0; j < 3; ++j) {
224				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
225
226				if (swz < RC_SWIZZLE_W)
227					srcrgb = 1;
228				else if (swz == RC_SWIZZLE_W)
229					srcalpha = 1;
230
231				if (swz < RC_SWIZZLE_UNUSED)
232					srcmask |= 1 << j;
233			}
234			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
235							inst->SrcReg[i].File, inst->SrcReg[i].Index);
236			if (source < 0) {
237				rc_error(&c->Base, "Failed to translate "
238							"rgb instruction.\n");
239				return;
240			}
241			pair->RGB.Arg[i].Source = source;
242			pair->RGB.Arg[i].Swizzle =
243				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
244			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
245			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
246		}
247		if (needalpha) {
248			unsigned int srcrgb = 0;
249			unsigned int srcalpha = 0;
250			unsigned int swz;
251			if (istranscendent) {
252				swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
253			} else {
254				swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
255			}
256
257			if (swz < 3)
258				srcrgb = 1;
259			else if (swz < 4)
260				srcalpha = 1;
261			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
262							inst->SrcReg[i].File, inst->SrcReg[i].Index);
263			if (source < 0) {
264				rc_error(&c->Base, "Failed to translate "
265							"alpha instruction.\n");
266				return;
267			}
268			pair->Alpha.Arg[i].Source = source;
269			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
270			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
271
272			if (istranscendent) {
273				pair->Alpha.Arg[i].Negate =
274					!!(inst->SrcReg[i].Negate &
275							inst->DstReg.WriteMask);
276			} else {
277				pair->Alpha.Arg[i].Negate =
278					!!(inst->SrcReg[i].Negate & RC_MASK_W);
279			}
280		}
281	}
282
283	/* Destination handling */
284	if (inst->DstReg.File == RC_FILE_OUTPUT) {
285        if (inst->DstReg.Index == c->OutputDepth) {
286            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
287        } else {
288            for (i = 0; i < 4; i++) {
289                if (inst->DstReg.Index == c->OutputColor[i]) {
290                    pair->RGB.Target = i;
291                    pair->Alpha.Target = i;
292                    pair->RGB.OutputWriteMask |=
293                        inst->DstReg.WriteMask & RC_MASK_XYZ;
294                    pair->Alpha.OutputWriteMask |=
295                        GET_BIT(inst->DstReg.WriteMask, 3);
296                    break;
297                }
298            }
299        }
300	} else {
301		if (needrgb) {
302			pair->RGB.DestIndex = inst->DstReg.Index;
303			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
304		}
305
306		if (needalpha) {
307			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
308			if (pair->Alpha.WriteMask) {
309				pair->Alpha.DestIndex = inst->DstReg.Index;
310			}
311		}
312	}
313
314	if (needrgb) {
315		pair->RGB.Omod = inst->Omod;
316	}
317	if (needalpha) {
318		pair->Alpha.Omod = inst->Omod;
319	}
320
321	if (inst->WriteALUResult) {
322		pair->WriteALUResult = inst->WriteALUResult;
323		pair->ALUResultCompare = inst->ALUResultCompare;
324	}
325}
326
327
328static void check_opcode_support(struct r300_fragment_program_compiler *c,
329				 struct rc_sub_instruction *inst)
330{
331	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
332
333	if (opcode->HasDstReg) {
334		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
335			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
336			return;
337		}
338	}
339
340	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
341		if (inst->SrcReg[i].RelAddr) {
342			rc_error(&c->Base, "Fragment program does not support relative addressing "
343				 " of source operands.\n");
344			return;
345		}
346	}
347}
348
349
350/**
351 * Translate all ALU instructions into corresponding pair instructions,
352 * performing no other changes.
353 */
354void rc_pair_translate(struct radeon_compiler *cc, void *user)
355{
356	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
357
358	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
359	    inst != &c->Base.Program.Instructions;
360	    inst = inst->Next) {
361		const struct rc_opcode_info * opcode;
362		struct rc_sub_instruction copy;
363
364		if (inst->Type != RC_INSTRUCTION_NORMAL)
365			continue;
366
367		opcode = rc_get_opcode_info(inst->U.I.Opcode);
368
369		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
370			continue;
371
372		copy = inst->U.I;
373
374		check_opcode_support(c, &copy);
375
376		final_rewrite(&copy);
377		inst->Type = RC_INSTRUCTION_PAIR;
378		set_pair_instruction(c, &inst->U.P, &copy);
379	}
380}
381