1/*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23#include "radeon_compiler.h"
24
25#include <stdarg.h>
26#include <stdio.h>
27#include <stdlib.h>
28
29#include "radeon_dataflow.h"
30#include "radeon_program.h"
31#include "radeon_program_pair.h"
32#include "radeon_regalloc.h"
33#include "radeon_compiler_util.h"
34
35
36void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
37{
38	memset(c, 0, sizeof(*c));
39
40	memory_pool_init(&c->Pool);
41	c->Program.Instructions.Prev = &c->Program.Instructions;
42	c->Program.Instructions.Next = &c->Program.Instructions;
43	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
44	c->regalloc_state = rs;
45}
46
47void rc_destroy(struct radeon_compiler * c)
48{
49	rc_constants_destroy(&c->Program.Constants);
50	memory_pool_destroy(&c->Pool);
51	free(c->ErrorMsg);
52}
53
54void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
55{
56	va_list ap;
57
58	if (!(c->Debug & RC_DBG_LOG))
59		return;
60
61	va_start(ap, fmt);
62	vfprintf(stderr, fmt, ap);
63	va_end(ap);
64}
65
66void rc_error(struct radeon_compiler * c, const char * fmt, ...)
67{
68	va_list ap;
69
70	c->Error = 1;
71
72	if (!c->ErrorMsg) {
73		/* Only remember the first error */
74		char buf[1024];
75		int written;
76
77		va_start(ap, fmt);
78		written = vsnprintf(buf, sizeof(buf), fmt, ap);
79		va_end(ap);
80
81		if (written < sizeof(buf)) {
82			c->ErrorMsg = strdup(buf);
83		} else {
84			c->ErrorMsg = malloc(written + 1);
85
86			va_start(ap, fmt);
87			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
88			va_end(ap);
89		}
90	}
91
92	if (c->Debug & RC_DBG_LOG) {
93		fprintf(stderr, "r300compiler error: ");
94
95		va_start(ap, fmt);
96		vfprintf(stderr, fmt, ap);
97		va_end(ap);
98	}
99}
100
101int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
102{
103	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
104	return 1;
105}
106
107/**
108 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
109 * based on which inputs and outputs are actually referenced
110 * in program instructions.
111 */
112void rc_calculate_inputs_outputs(struct radeon_compiler * c)
113{
114	struct rc_instruction *inst;
115
116	c->Program.InputsRead = 0;
117	c->Program.OutputsWritten = 0;
118
119	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
120	{
121		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
122		int i;
123
124		for (i = 0; i < opcode->NumSrcRegs; ++i) {
125			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
126				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
127		}
128
129		if (opcode->HasDstReg) {
130			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
131				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
132		}
133	}
134}
135
136/**
137 * Rewrite the program such that everything that source the given input
138 * register will source new_input instead.
139 */
140void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
141{
142	struct rc_instruction * inst;
143
144	c->Program.InputsRead &= ~(1 << input);
145
146	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
147		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
148		unsigned i;
149
150		for(i = 0; i < opcode->NumSrcRegs; ++i) {
151			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
152				inst->U.I.SrcReg[i].File = new_input.File;
153				inst->U.I.SrcReg[i].Index = new_input.Index;
154				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
155				if (!inst->U.I.SrcReg[i].Abs) {
156					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
157					inst->U.I.SrcReg[i].Abs = new_input.Abs;
158				}
159
160				c->Program.InputsRead |= 1 << new_input.Index;
161			}
162		}
163	}
164}
165
166
167/**
168 * Rewrite the program such that everything that writes into the given
169 * output register will instead write to new_output. The new_output
170 * writemask is honoured.
171 */
172void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
173{
174	struct rc_instruction * inst;
175
176	c->Program.OutputsWritten &= ~(1 << output);
177
178	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
179		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
180
181		if (opcode->HasDstReg) {
182			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
183				inst->U.I.DstReg.Index = new_output;
184				inst->U.I.DstReg.WriteMask &= writemask;
185
186				c->Program.OutputsWritten |= 1 << new_output;
187			}
188		}
189	}
190}
191
192
193/**
194 * Rewrite the program such that a given output is duplicated.
195 */
196void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
197{
198	unsigned tempreg = rc_find_free_temporary(c);
199	struct rc_instruction * inst;
200
201	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
202		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
203
204		if (opcode->HasDstReg) {
205			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
206				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
207				inst->U.I.DstReg.Index = tempreg;
208			}
209		}
210	}
211
212	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
213	inst->U.I.Opcode = RC_OPCODE_MOV;
214	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
215	inst->U.I.DstReg.Index = output;
216
217	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
218	inst->U.I.SrcReg[0].Index = tempreg;
219	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
220
221	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
222	inst->U.I.Opcode = RC_OPCODE_MOV;
223	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
224	inst->U.I.DstReg.Index = dup_output;
225
226	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
227	inst->U.I.SrcReg[0].Index = tempreg;
228	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
229
230	c->Program.OutputsWritten |= 1 << dup_output;
231}
232
233
234/**
235 * Introduce standard code fragment to deal with fragment.position.
236 */
237void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
238                                int full_vtransform)
239{
240	unsigned tempregi = rc_find_free_temporary(c);
241	struct rc_instruction * inst_rcp;
242	struct rc_instruction * inst_mul;
243	struct rc_instruction * inst_mad;
244	struct rc_instruction * inst;
245
246	c->Program.InputsRead &= ~(1 << wpos);
247	c->Program.InputsRead |= 1 << new_input;
248
249	/* perspective divide */
250	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
251	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
252
253	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
254	inst_rcp->U.I.DstReg.Index = tempregi;
255	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
256
257	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
258	inst_rcp->U.I.SrcReg[0].Index = new_input;
259	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
260
261	inst_mul = rc_insert_new_instruction(c, inst_rcp);
262	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
263
264	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
265	inst_mul->U.I.DstReg.Index = tempregi;
266	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
267
268	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
269	inst_mul->U.I.SrcReg[0].Index = new_input;
270
271	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
272	inst_mul->U.I.SrcReg[1].Index = tempregi;
273	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
274
275	/* viewport transformation */
276	inst_mad = rc_insert_new_instruction(c, inst_mul);
277	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
278
279	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
280	inst_mad->U.I.DstReg.Index = tempregi;
281	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
282
283	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
284	inst_mad->U.I.SrcReg[0].Index = tempregi;
285	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
286
287	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
288	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
289
290	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
291	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
292
293	if (full_vtransform) {
294		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
295		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
296	} else {
297		inst_mad->U.I.SrcReg[1].Index =
298		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
299	}
300
301	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
302		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
303		unsigned i;
304
305		for(i = 0; i < opcode->NumSrcRegs; i++) {
306			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
307			    inst->U.I.SrcReg[i].Index == wpos) {
308				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
309				inst->U.I.SrcReg[i].Index = tempregi;
310			}
311		}
312	}
313}
314
315
316/**
317 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
318 * Gallium and OpenGL define it the other way around.
319 *
320 * So let's just negate FACE at the beginning of the shader and rewrite the rest
321 * of the shader to read from the newly allocated temporary.
322 */
323void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
324{
325	unsigned tempregi = rc_find_free_temporary(c);
326	struct rc_instruction *inst_add;
327	struct rc_instruction *inst;
328
329	/* perspective divide */
330	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
331	inst_add->U.I.Opcode = RC_OPCODE_ADD;
332
333	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
334	inst_add->U.I.DstReg.Index = tempregi;
335	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
336
337	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
338	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
339
340	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
341	inst_add->U.I.SrcReg[1].Index = face;
342	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
343	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
344
345	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
346		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
347		unsigned i;
348
349		for(i = 0; i < opcode->NumSrcRegs; i++) {
350			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
351			    inst->U.I.SrcReg[i].Index == face) {
352				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
353				inst->U.I.SrcReg[i].Index = tempregi;
354			}
355		}
356	}
357}
358
359static void reg_count_callback(void * userdata, struct rc_instruction * inst,
360		rc_register_file file, unsigned int index, unsigned int mask)
361{
362	struct rc_program_stats *s = userdata;
363	if (file == RC_FILE_TEMPORARY)
364		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
365	if (file == RC_FILE_INLINE)
366		s->num_inline_literals++;
367}
368
369void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
370{
371	struct rc_instruction * tmp;
372	memset(s, 0, sizeof(*s));
373
374	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
375							tmp = tmp->Next){
376		const struct rc_opcode_info * info;
377		rc_for_all_reads_mask(tmp, reg_count_callback, s);
378		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
379			info = rc_get_opcode_info(tmp->U.I.Opcode);
380			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
381				continue;
382			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
383				s->num_presub_ops++;
384		} else {
385			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
386				s->num_presub_ops++;
387			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
388				s->num_presub_ops++;
389			/* Assuming alpha will never be a flow control or
390			 * a tex instruction. */
391			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
392				s->num_alpha_insts++;
393			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
394				s->num_rgb_insts++;
395			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
396				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
397				s->num_omod_ops++;
398			}
399			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
400				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
401				s->num_omod_ops++;
402			}
403			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
404		}
405		if (info->IsFlowControl)
406			s->num_fc_insts++;
407		if (info->HasTexture)
408			s->num_tex_insts++;
409		s->num_insts++;
410	}
411	/* Increment here because the reg_count_callback store the max
412	 * temporary reg index in s->nun_temp_regs. */
413	s->num_temp_regs++;
414}
415
416static void print_stats(struct radeon_compiler * c)
417{
418	struct rc_program_stats s;
419
420	if (c->initial_num_insts <= 5)
421		return;
422
423	rc_get_stats(c, &s);
424
425	switch (c->type) {
426	case RC_VERTEX_PROGRAM:
427		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
428			       "~%4u Instructions\n"
429			       "~%4u Flow Control Instructions\n"
430			       "~%4u Temporary Registers\n"
431			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
432			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
433		break;
434
435	case RC_FRAGMENT_PROGRAM:
436		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
437			       "~%4u Instructions\n"
438			       "~%4u Vector Instructions (RGB)\n"
439			       "~%4u Scalar Instructions (Alpha)\n"
440			       "~%4u Flow Control Instructions\n"
441			       "~%4u Texture Instructions\n"
442			       "~%4u Presub Operations\n"
443			       "~%4u OMOD Operations\n"
444			       "~%4u Temporary Registers\n"
445			       "~%4u Inline Literals\n"
446			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
447			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
448			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
449			       s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
450		break;
451	default:
452		assert(0);
453	}
454}
455
456static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
457	"Vertex Program",
458	"Fragment Program"
459};
460
461void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
462{
463	for (unsigned i = 0; list[i].name; i++) {
464		if (list[i].predicate) {
465			list[i].run(c, list[i].user);
466
467			if (c->Error)
468				return;
469
470			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
471				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
472				rc_print_program(&c->Program);
473			}
474		}
475	}
476}
477
478/* Executes a list of compiler passes given in the parameter 'list'. */
479void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
480{
481	struct rc_program_stats s;
482
483	rc_get_stats(c, &s);
484	c->initial_num_insts = s.num_insts;
485
486	if (c->Debug & RC_DBG_LOG) {
487		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
488		rc_print_program(&c->Program);
489	}
490
491	rc_run_compiler_passes(c, list);
492
493	if (c->Debug & RC_DBG_STATS)
494		print_stats(c);
495}
496
497void rc_validate_final_shader(struct radeon_compiler *c, void *user)
498{
499	/* Check the number of constants. */
500	if (c->Program.Constants.Count > c->max_constants) {
501		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
502			 c->max_constants, c->Program.Constants.Count);
503	}
504}
505