1/*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23#include "radeon_compiler.h"
24
25#include <stdarg.h>
26#include <stdio.h>
27#include <stdlib.h>
28
29#include "radeon_dataflow.h"
30#include "radeon_program.h"
31#include "radeon_program_pair.h"
32#include "radeon_compiler_util.h"
33
34
35void rc_init(struct radeon_compiler * c)
36{
37	memset(c, 0, sizeof(*c));
38
39	memory_pool_init(&c->Pool);
40	c->Program.Instructions.Prev = &c->Program.Instructions;
41	c->Program.Instructions.Next = &c->Program.Instructions;
42	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
43}
44
45void rc_destroy(struct radeon_compiler * c)
46{
47	rc_constants_destroy(&c->Program.Constants);
48	memory_pool_destroy(&c->Pool);
49	free(c->ErrorMsg);
50}
51
52void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
53{
54	va_list ap;
55
56	if (!(c->Debug & RC_DBG_LOG))
57		return;
58
59	va_start(ap, fmt);
60	vfprintf(stderr, fmt, ap);
61	va_end(ap);
62}
63
64void rc_error(struct radeon_compiler * c, const char * fmt, ...)
65{
66	va_list ap;
67
68	c->Error = 1;
69
70	if (!c->ErrorMsg) {
71		/* Only remember the first error */
72		char buf[1024];
73		int written;
74
75		va_start(ap, fmt);
76		written = vsnprintf(buf, sizeof(buf), fmt, ap);
77		va_end(ap);
78
79		if (written < sizeof(buf)) {
80			c->ErrorMsg = strdup(buf);
81		} else {
82			c->ErrorMsg = malloc(written + 1);
83
84			va_start(ap, fmt);
85			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
86			va_end(ap);
87		}
88	}
89
90	if (c->Debug & RC_DBG_LOG) {
91		fprintf(stderr, "r300compiler error: ");
92
93		va_start(ap, fmt);
94		vfprintf(stderr, fmt, ap);
95		va_end(ap);
96	}
97}
98
99int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
100{
101	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
102	return 1;
103}
104
105/**
106 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
107 * based on which inputs and outputs are actually referenced
108 * in program instructions.
109 */
110void rc_calculate_inputs_outputs(struct radeon_compiler * c)
111{
112	struct rc_instruction *inst;
113
114	c->Program.InputsRead = 0;
115	c->Program.OutputsWritten = 0;
116
117	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
118	{
119		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
120		int i;
121
122		for (i = 0; i < opcode->NumSrcRegs; ++i) {
123			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
124				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
125		}
126
127		if (opcode->HasDstReg) {
128			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
129				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
130		}
131	}
132}
133
134/**
135 * Rewrite the program such that everything that source the given input
136 * register will source new_input instead.
137 */
138void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
139{
140	struct rc_instruction * inst;
141
142	c->Program.InputsRead &= ~(1 << input);
143
144	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
145		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
146		unsigned i;
147
148		for(i = 0; i < opcode->NumSrcRegs; ++i) {
149			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
150				inst->U.I.SrcReg[i].File = new_input.File;
151				inst->U.I.SrcReg[i].Index = new_input.Index;
152				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
153				if (!inst->U.I.SrcReg[i].Abs) {
154					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
155					inst->U.I.SrcReg[i].Abs = new_input.Abs;
156				}
157
158				c->Program.InputsRead |= 1 << new_input.Index;
159			}
160		}
161	}
162}
163
164
165/**
166 * Rewrite the program such that everything that writes into the given
167 * output register will instead write to new_output. The new_output
168 * writemask is honoured.
169 */
170void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
171{
172	struct rc_instruction * inst;
173
174	c->Program.OutputsWritten &= ~(1 << output);
175
176	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
177		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
178
179		if (opcode->HasDstReg) {
180			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
181				inst->U.I.DstReg.Index = new_output;
182				inst->U.I.DstReg.WriteMask &= writemask;
183
184				c->Program.OutputsWritten |= 1 << new_output;
185			}
186		}
187	}
188}
189
190
191/**
192 * Rewrite the program such that a given output is duplicated.
193 */
194void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
195{
196	unsigned tempreg = rc_find_free_temporary(c);
197	struct rc_instruction * inst;
198
199	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
200		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
201
202		if (opcode->HasDstReg) {
203			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
204				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
205				inst->U.I.DstReg.Index = tempreg;
206			}
207		}
208	}
209
210	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
211	inst->U.I.Opcode = RC_OPCODE_MOV;
212	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
213	inst->U.I.DstReg.Index = output;
214
215	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
216	inst->U.I.SrcReg[0].Index = tempreg;
217	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
218
219	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
220	inst->U.I.Opcode = RC_OPCODE_MOV;
221	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
222	inst->U.I.DstReg.Index = dup_output;
223
224	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
225	inst->U.I.SrcReg[0].Index = tempreg;
226	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
227
228	c->Program.OutputsWritten |= 1 << dup_output;
229}
230
231
232/**
233 * Introduce standard code fragment to deal with fragment.position.
234 */
235void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
236                                int full_vtransform)
237{
238	unsigned tempregi = rc_find_free_temporary(c);
239	struct rc_instruction * inst_rcp;
240	struct rc_instruction * inst_mul;
241	struct rc_instruction * inst_mad;
242	struct rc_instruction * inst;
243
244	c->Program.InputsRead &= ~(1 << wpos);
245	c->Program.InputsRead |= 1 << new_input;
246
247	/* perspective divide */
248	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
249	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
250
251	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
252	inst_rcp->U.I.DstReg.Index = tempregi;
253	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
254
255	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
256	inst_rcp->U.I.SrcReg[0].Index = new_input;
257	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
258
259	inst_mul = rc_insert_new_instruction(c, inst_rcp);
260	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
261
262	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
263	inst_mul->U.I.DstReg.Index = tempregi;
264	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
265
266	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
267	inst_mul->U.I.SrcReg[0].Index = new_input;
268
269	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
270	inst_mul->U.I.SrcReg[1].Index = tempregi;
271	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
272
273	/* viewport transformation */
274	inst_mad = rc_insert_new_instruction(c, inst_mul);
275	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
276
277	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
278	inst_mad->U.I.DstReg.Index = tempregi;
279	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
280
281	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
282	inst_mad->U.I.SrcReg[0].Index = tempregi;
283	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
284
285	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
286	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
287
288	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
289	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
290
291	if (full_vtransform) {
292		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
293		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
294	} else {
295		inst_mad->U.I.SrcReg[1].Index =
296		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
297	}
298
299	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
300		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
301		unsigned i;
302
303		for(i = 0; i < opcode->NumSrcRegs; i++) {
304			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
305			    inst->U.I.SrcReg[i].Index == wpos) {
306				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
307				inst->U.I.SrcReg[i].Index = tempregi;
308			}
309		}
310	}
311}
312
313
314/**
315 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
316 * Gallium and OpenGL define it the other way around.
317 *
318 * So let's just negate FACE at the beginning of the shader and rewrite the rest
319 * of the shader to read from the newly allocated temporary.
320 */
321void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
322{
323	unsigned tempregi = rc_find_free_temporary(c);
324	struct rc_instruction *inst_add;
325	struct rc_instruction *inst;
326
327	/* perspective divide */
328	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
329	inst_add->U.I.Opcode = RC_OPCODE_ADD;
330
331	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
332	inst_add->U.I.DstReg.Index = tempregi;
333	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
334
335	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
336	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
337
338	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
339	inst_add->U.I.SrcReg[1].Index = face;
340	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
341	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
342
343	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
344		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
345		unsigned i;
346
347		for(i = 0; i < opcode->NumSrcRegs; i++) {
348			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
349			    inst->U.I.SrcReg[i].Index == face) {
350				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
351				inst->U.I.SrcReg[i].Index = tempregi;
352			}
353		}
354	}
355}
356
357static void reg_count_callback(void * userdata, struct rc_instruction * inst,
358		rc_register_file file, unsigned int index, unsigned int mask)
359{
360	struct rc_program_stats *s = userdata;
361	if (file == RC_FILE_TEMPORARY)
362		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
363	if (file == RC_FILE_INLINE)
364		s->num_inline_literals++;
365}
366
367void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
368{
369	struct rc_instruction * tmp;
370	memset(s, 0, sizeof(*s));
371
372	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
373							tmp = tmp->Next){
374		const struct rc_opcode_info * info;
375		rc_for_all_reads_mask(tmp, reg_count_callback, s);
376		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
377			info = rc_get_opcode_info(tmp->U.I.Opcode);
378			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
379				continue;
380			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
381				s->num_presub_ops++;
382		} else {
383			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
384				s->num_presub_ops++;
385			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
386				s->num_presub_ops++;
387			/* Assuming alpha will never be a flow control or
388			 * a tex instruction. */
389			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
390				s->num_alpha_insts++;
391			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
392				s->num_rgb_insts++;
393			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
394				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
395				s->num_omod_ops++;
396			}
397			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
398				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
399				s->num_omod_ops++;
400			}
401			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
402		}
403		if (info->IsFlowControl)
404			s->num_fc_insts++;
405		if (info->HasTexture)
406			s->num_tex_insts++;
407		s->num_insts++;
408	}
409	/* Increment here because the reg_count_callback store the max
410	 * temporary reg index in s->nun_temp_regs. */
411	s->num_temp_regs++;
412}
413
414static void print_stats(struct radeon_compiler * c)
415{
416	struct rc_program_stats s;
417
418	if (c->initial_num_insts <= 5)
419		return;
420
421	rc_get_stats(c, &s);
422
423	switch (c->type) {
424	case RC_VERTEX_PROGRAM:
425		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
426			       "~%4u Instructions\n"
427			       "~%4u Flow Control Instructions\n"
428			       "~%4u Temporary Registers\n"
429			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
430			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
431		break;
432
433	case RC_FRAGMENT_PROGRAM:
434		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
435			       "~%4u Instructions\n"
436			       "~%4u Vector Instructions (RGB)\n"
437			       "~%4u Scalar Instructions (Alpha)\n"
438			       "~%4u Flow Control Instructions\n"
439			       "~%4u Texture Instructions\n"
440			       "~%4u Presub Operations\n"
441			       "~%4u OMOD Operations\n"
442			       "~%4u Temporary Registers\n"
443			       "~%4u Inline Literals\n"
444			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
445			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
446			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
447			       s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
448		break;
449	default:
450		assert(0);
451	}
452}
453
454static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
455	"Vertex Program",
456	"Fragment Program"
457};
458
459void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
460{
461	for (unsigned i = 0; list[i].name; i++) {
462		if (list[i].predicate) {
463			list[i].run(c, list[i].user);
464
465			if (c->Error)
466				return;
467
468			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
469				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
470				rc_print_program(&c->Program);
471			}
472		}
473	}
474}
475
476/* Executes a list of compiler passes given in the parameter 'list'. */
477void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
478{
479	struct rc_program_stats s;
480
481	rc_get_stats(c, &s);
482	c->initial_num_insts = s.num_insts;
483
484	if (c->Debug & RC_DBG_LOG) {
485		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
486		rc_print_program(&c->Program);
487	}
488
489	rc_run_compiler_passes(c, list);
490
491	if (c->Debug & RC_DBG_STATS)
492		print_stats(c);
493}
494
495void rc_validate_final_shader(struct radeon_compiler *c, void *user)
496{
497	/* Check the number of constants. */
498	if (c->Program.Constants.Count > c->max_constants) {
499		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
500			 c->max_constants, c->Program.Constants.Count);
501	}
502}
503