radeon_optimize.c revision e945fb04d04c33da5e77d22d739c5740a522a61e
1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29#include "radeon_dataflow.h"
30
31#include "radeon_compiler.h"
32#include "radeon_compiler_util.h"
33#include "radeon_list.h"
34#include "radeon_swizzle.h"
35#include "radeon_variable.h"
36
37struct src_clobbered_reads_cb_data {
38	rc_register_file File;
39	unsigned int Index;
40	unsigned int Mask;
41	struct rc_reader_data * ReaderData;
42};
43
44typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45						struct rc_instruction *,
46						unsigned int);
47
48static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49{
50	struct rc_src_register combine;
51	combine.File = inner.File;
52	combine.Index = inner.Index;
53	combine.RelAddr = inner.RelAddr;
54	if (outer.Abs) {
55		combine.Abs = 1;
56		combine.Negate = outer.Negate;
57	} else {
58		combine.Abs = inner.Abs;
59		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60		combine.Negate ^= outer.Negate;
61	}
62	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63	return combine;
64}
65
66static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67						struct rc_src_register * src)
68{
69	rc_register_file file = src->File;
70	struct rc_reader_data * reader_data = data;
71
72	if(!rc_inst_can_use_presub(inst,
73				reader_data->Writer->U.I.PreSub.Opcode,
74				rc_swizzle_to_writemask(src->Swizzle),
75				src,
76				&reader_data->Writer->U.I.PreSub.SrcReg[0],
77				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78		reader_data->Abort = 1;
79		return;
80	}
81
82	/* XXX This could probably be handled better. */
83	if (file == RC_FILE_ADDRESS) {
84		reader_data->Abort = 1;
85		return;
86	}
87
88	/* These instructions cannot read from the constants file.
89	 * see radeonTransformTEX()
90	 */
91	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93				(inst->U.I.Opcode == RC_OPCODE_TEX ||
94				inst->U.I.Opcode == RC_OPCODE_TXB ||
95				inst->U.I.Opcode == RC_OPCODE_TXP ||
96				inst->U.I.Opcode == RC_OPCODE_TXD ||
97				inst->U.I.Opcode == RC_OPCODE_TXL ||
98				inst->U.I.Opcode == RC_OPCODE_KIL)){
99		reader_data->Abort = 1;
100		return;
101	}
102}
103
104static void src_clobbered_reads_cb(
105	void * data,
106	struct rc_instruction * inst,
107	struct rc_src_register * src)
108{
109	struct src_clobbered_reads_cb_data * sc_data = data;
110
111	if (src->File == sc_data->File
112	    && src->Index == sc_data->Index
113	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114
115		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116	}
117
118	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120	}
121}
122
123static void is_src_clobbered_scan_write(
124	void * data,
125	struct rc_instruction * inst,
126	rc_register_file file,
127	unsigned int index,
128	unsigned int mask)
129{
130	struct src_clobbered_reads_cb_data sc_data;
131	struct rc_reader_data * reader_data = data;
132	sc_data.File = file;
133	sc_data.Index = index;
134	sc_data.Mask = mask;
135	sc_data.ReaderData = reader_data;
136	rc_for_all_reads_src(reader_data->Writer,
137					src_clobbered_reads_cb, &sc_data);
138}
139
140static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141{
142	struct rc_reader_data reader_data;
143	unsigned int i;
144
145	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146	    inst_mov->U.I.WriteALUResult ||
147	    inst_mov->U.I.SaturateMode)
148		return;
149
150	/* Get a list of all the readers of this MOV instruction. */
151	reader_data.ExitOnAbort = 1;
152	rc_get_readers(c, inst_mov, &reader_data,
153		       copy_propagate_scan_read, NULL,
154		       is_src_clobbered_scan_write);
155
156	if (reader_data.Abort || reader_data.ReaderCount == 0)
157		return;
158
159	/* Propagate the MOV instruction. */
160	for (i = 0; i < reader_data.ReaderCount; i++) {
161		struct rc_instruction * inst = reader_data.Readers[i].Inst;
162		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
163
164		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
165			inst->U.I.PreSub = inst_mov->U.I.PreSub;
166	}
167
168	/* Finally, remove the original MOV instruction */
169	rc_remove_instruction(inst_mov);
170}
171
172/**
173 * Check if a source register is actually always the same
174 * swizzle constant.
175 */
176static int is_src_uniform_constant(struct rc_src_register src,
177		rc_swizzle * pswz, unsigned int * pnegate)
178{
179	int have_used = 0;
180
181	if (src.File != RC_FILE_NONE) {
182		*pswz = 0;
183		return 0;
184	}
185
186	for(unsigned int chan = 0; chan < 4; ++chan) {
187		unsigned int swz = GET_SWZ(src.Swizzle, chan);
188		if (swz < 4) {
189			*pswz = 0;
190			return 0;
191		}
192		if (swz == RC_SWIZZLE_UNUSED)
193			continue;
194
195		if (!have_used) {
196			*pswz = swz;
197			*pnegate = GET_BIT(src.Negate, chan);
198			have_used = 1;
199		} else {
200			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
201				*pswz = 0;
202				return 0;
203			}
204		}
205	}
206
207	return 1;
208}
209
210static void constant_folding_mad(struct rc_instruction * inst)
211{
212	rc_swizzle swz = 0;
213	unsigned int negate= 0;
214
215	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
216		if (swz == RC_SWIZZLE_ZERO) {
217			inst->U.I.Opcode = RC_OPCODE_MUL;
218			return;
219		}
220	}
221
222	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
223		if (swz == RC_SWIZZLE_ONE) {
224			inst->U.I.Opcode = RC_OPCODE_ADD;
225			if (negate)
226				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
227			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
228			return;
229		} else if (swz == RC_SWIZZLE_ZERO) {
230			inst->U.I.Opcode = RC_OPCODE_MOV;
231			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
232			return;
233		}
234	}
235
236	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
237		if (swz == RC_SWIZZLE_ONE) {
238			inst->U.I.Opcode = RC_OPCODE_ADD;
239			if (negate)
240				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
241			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
242			return;
243		} else if (swz == RC_SWIZZLE_ZERO) {
244			inst->U.I.Opcode = RC_OPCODE_MOV;
245			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
246			return;
247		}
248	}
249}
250
251static void constant_folding_mul(struct rc_instruction * inst)
252{
253	rc_swizzle swz = 0;
254	unsigned int negate = 0;
255
256	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
257		if (swz == RC_SWIZZLE_ONE) {
258			inst->U.I.Opcode = RC_OPCODE_MOV;
259			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
260			if (negate)
261				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
262			return;
263		} else if (swz == RC_SWIZZLE_ZERO) {
264			inst->U.I.Opcode = RC_OPCODE_MOV;
265			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
266			return;
267		}
268	}
269
270	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
271		if (swz == RC_SWIZZLE_ONE) {
272			inst->U.I.Opcode = RC_OPCODE_MOV;
273			if (negate)
274				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
275			return;
276		} else if (swz == RC_SWIZZLE_ZERO) {
277			inst->U.I.Opcode = RC_OPCODE_MOV;
278			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
279			return;
280		}
281	}
282}
283
284static void constant_folding_add(struct rc_instruction * inst)
285{
286	rc_swizzle swz = 0;
287	unsigned int negate = 0;
288
289	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
290		if (swz == RC_SWIZZLE_ZERO) {
291			inst->U.I.Opcode = RC_OPCODE_MOV;
292			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
293			return;
294		}
295	}
296
297	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
298		if (swz == RC_SWIZZLE_ZERO) {
299			inst->U.I.Opcode = RC_OPCODE_MOV;
300			return;
301		}
302	}
303}
304
305/**
306 * Replace 0.0, 1.0 and 0.5 immediate constants by their
307 * respective swizzles. Simplify instructions like ADD dst, src, 0;
308 */
309static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
310{
311	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
312	unsigned int i;
313
314	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
315	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
316		struct rc_constant * constant;
317		struct rc_src_register newsrc;
318		int have_real_reference;
319		unsigned int chan;
320
321		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
322		for (chan = 0; chan < 4; ++chan)
323			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
324				break;
325		if (chan == 4) {
326			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
327			continue;
328		}
329
330		/* Convert immediates to swizzles. */
331		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
332		    inst->U.I.SrcReg[src].RelAddr ||
333		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
334			continue;
335
336		constant =
337			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
338
339		if (constant->Type != RC_CONSTANT_IMMEDIATE)
340			continue;
341
342		newsrc = inst->U.I.SrcReg[src];
343		have_real_reference = 0;
344		for (chan = 0; chan < 4; ++chan) {
345			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
346			unsigned int newswz;
347			float imm;
348			float baseimm;
349
350			if (swz >= 4)
351				continue;
352
353			imm = constant->u.Immediate[swz];
354			baseimm = imm;
355			if (imm < 0.0)
356				baseimm = -baseimm;
357
358			if (baseimm == 0.0) {
359				newswz = RC_SWIZZLE_ZERO;
360			} else if (baseimm == 1.0) {
361				newswz = RC_SWIZZLE_ONE;
362			} else if (baseimm == 0.5 && c->has_half_swizzles) {
363				newswz = RC_SWIZZLE_HALF;
364			} else {
365				have_real_reference = 1;
366				continue;
367			}
368
369			SET_SWZ(newsrc.Swizzle, chan, newswz);
370			if (imm < 0.0 && !newsrc.Abs)
371				newsrc.Negate ^= 1 << chan;
372		}
373
374		if (!have_real_reference) {
375			newsrc.File = RC_FILE_NONE;
376			newsrc.Index = 0;
377		}
378
379		/* don't make the swizzle worse */
380		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
381		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
382			continue;
383
384		inst->U.I.SrcReg[src] = newsrc;
385	}
386
387	/* Simplify instructions based on constants */
388	if (inst->U.I.Opcode == RC_OPCODE_MAD)
389		constant_folding_mad(inst);
390
391	/* note: MAD can simplify to MUL or ADD */
392	if (inst->U.I.Opcode == RC_OPCODE_MUL)
393		constant_folding_mul(inst);
394	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
395		constant_folding_add(inst);
396
397	/* In case this instruction has been converted, make sure all of the
398	 * registers that are no longer used are empty. */
399	opcode = rc_get_opcode_info(inst->U.I.Opcode);
400	for(i = opcode->NumSrcRegs; i < 3; i++) {
401		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
402	}
403}
404
405/**
406 * If src and dst use the same register, this function returns a writemask that
407 * indicates wich components are read by src.  Otherwise zero is returned.
408 */
409static unsigned int src_reads_dst_mask(struct rc_src_register src,
410						struct rc_dst_register dst)
411{
412	if (dst.File != src.File || dst.Index != src.Index) {
413		return 0;
414	}
415	return rc_swizzle_to_writemask(src.Swizzle);
416}
417
418/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
419 * in any of its channels.  Return 0 otherwise. */
420static int src_has_const_swz(struct rc_src_register src) {
421	int chan;
422	for(chan = 0; chan < 4; chan++) {
423		unsigned int swz = GET_SWZ(src.Swizzle, chan);
424		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
425						|| swz == RC_SWIZZLE_ONE) {
426			return 1;
427		}
428	}
429	return 0;
430}
431
432static void presub_scan_read(
433	void * data,
434	struct rc_instruction * inst,
435	struct rc_src_register * src)
436{
437	struct rc_reader_data * reader_data = data;
438	rc_presubtract_op * presub_opcode = reader_data->CbData;
439
440	if (!rc_inst_can_use_presub(inst, *presub_opcode,
441			reader_data->Writer->U.I.DstReg.WriteMask,
442			src,
443			&reader_data->Writer->U.I.SrcReg[0],
444			&reader_data->Writer->U.I.SrcReg[1])) {
445		reader_data->Abort = 1;
446		return;
447	}
448}
449
450static int presub_helper(
451	struct radeon_compiler * c,
452	struct rc_instruction * inst_add,
453	rc_presubtract_op presub_opcode,
454	rc_presub_replace_fn presub_replace)
455{
456	struct rc_reader_data reader_data;
457	unsigned int i;
458	rc_presubtract_op cb_op = presub_opcode;
459
460	reader_data.CbData = &cb_op;
461	reader_data.ExitOnAbort = 1;
462	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
463						is_src_clobbered_scan_write);
464
465	if (reader_data.Abort || reader_data.ReaderCount == 0)
466		return 0;
467
468	for(i = 0; i < reader_data.ReaderCount; i++) {
469		unsigned int src_index;
470		struct rc_reader reader = reader_data.Readers[i];
471		const struct rc_opcode_info * info =
472				rc_get_opcode_info(reader.Inst->U.I.Opcode);
473
474		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
475			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
476				presub_replace(inst_add, reader.Inst, src_index);
477		}
478	}
479	return 1;
480}
481
482/* This function assumes that inst_add->U.I.SrcReg[0] and
483 * inst_add->U.I.SrcReg[1] aren't both negative. */
484static void presub_replace_add(
485	struct rc_instruction * inst_add,
486	struct rc_instruction * inst_reader,
487	unsigned int src_index)
488{
489	rc_presubtract_op presub_opcode;
490	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
491		presub_opcode = RC_PRESUB_SUB;
492	else
493		presub_opcode = RC_PRESUB_ADD;
494
495	if (inst_add->U.I.SrcReg[1].Negate) {
496		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
497		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
498	} else {
499		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
500		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
501	}
502	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
503	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
504	inst_reader->U.I.PreSub.Opcode = presub_opcode;
505	inst_reader->U.I.SrcReg[src_index] =
506			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
507					inst_reader->U.I.PreSub.SrcReg[0]);
508	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
509	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
510}
511
512static int is_presub_candidate(
513	struct radeon_compiler * c,
514	struct rc_instruction * inst)
515{
516	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
517	unsigned int i;
518	unsigned int is_constant[2] = {0, 0};
519
520	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
521
522	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
523			|| inst->U.I.SaturateMode
524			|| inst->U.I.WriteALUResult
525			|| inst->U.I.Omod) {
526		return 0;
527	}
528
529	/* If both sources use a constant swizzle, then we can't convert it to
530	 * a presubtract operation.  In fact for the ADD and SUB presubtract
531	 * operations neither source can contain a constant swizzle.  This
532	 * specific case is checked in peephole_add_presub_add() when
533	 * we make sure the swizzles for both sources are equal, so we
534	 * don't need to worry about it here. */
535	for (i = 0; i < 2; i++) {
536		int chan;
537		for (chan = 0; chan < 4; chan++) {
538			rc_swizzle swz =
539				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
540			if (swz == RC_SWIZZLE_ONE
541					|| swz == RC_SWIZZLE_ZERO
542					|| swz == RC_SWIZZLE_HALF) {
543				is_constant[i] = 1;
544			}
545		}
546	}
547	if (is_constant[0] && is_constant[1])
548		return 0;
549
550	for(i = 0; i < info->NumSrcRegs; i++) {
551		struct rc_src_register src = inst->U.I.SrcReg[i];
552		if (src_reads_dst_mask(src, inst->U.I.DstReg))
553			return 0;
554
555		src.File = RC_FILE_PRESUB;
556		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
557			return 0;
558	}
559	return 1;
560}
561
562static int peephole_add_presub_add(
563	struct radeon_compiler * c,
564	struct rc_instruction * inst_add)
565{
566	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
567        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
568        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
569
570	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
571		return 0;
572
573	/* src0 and src1 can't have absolute values */
574	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
575	        return 0;
576
577	/* presub_replace_add() assumes only one is negative */
578	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
579	        return 0;
580
581        /* if src0 is negative, at least all bits of dstmask have to be set */
582        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
583	        return 0;
584
585        /* if src1 is negative, at least all bits of dstmask have to be set */
586        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
587	        return 0;
588
589	if (!is_presub_candidate(c, inst_add))
590		return 0;
591
592	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
593		rc_remove_instruction(inst_add);
594		return 1;
595	}
596	return 0;
597}
598
599static void presub_replace_inv(
600	struct rc_instruction * inst_add,
601	struct rc_instruction * inst_reader,
602	unsigned int src_index)
603{
604	/* We must be careful not to modify inst_add, since it
605	 * is possible it will remain part of the program.*/
606	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
607	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
608	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
609	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
610						inst_reader->U.I.PreSub.SrcReg[0]);
611
612	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
613	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
614}
615
616/**
617 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
618 * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
619 * of the add instruction must have the constatnt 1 swizzle.  This function
620 * does not check const registers to see if their value is 1.0, so it should
621 * be called after the constant_folding optimization.
622 * @return
623 * 	0 if the ADD instruction is still part of the program.
624 * 	1 if the ADD instruction is no longer part of the program.
625 */
626static int peephole_add_presub_inv(
627	struct radeon_compiler * c,
628	struct rc_instruction * inst_add)
629{
630	unsigned int i, swz;
631
632	if (!is_presub_candidate(c, inst_add))
633		return 0;
634
635	/* Check if src0 is 1. */
636	/* XXX It would be nice to use is_src_uniform_constant here, but that
637	 * function only works if the register's file is RC_FILE_NONE */
638	for(i = 0; i < 4; i++ ) {
639		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
640		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
641						&& swz != RC_SWIZZLE_ONE) {
642			return 0;
643		}
644	}
645
646	/* Check src1. */
647	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
648						inst_add->U.I.DstReg.WriteMask
649		|| inst_add->U.I.SrcReg[1].Abs
650		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
651			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
652		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
653
654		return 0;
655	}
656
657	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
658		rc_remove_instruction(inst_add);
659		return 1;
660	}
661	return 0;
662}
663
664struct peephole_mul_cb_data {
665	struct rc_dst_register * Writer;
666	unsigned int Clobbered;
667};
668
669static void omod_filter_reader_cb(
670	void * userdata,
671	struct rc_instruction * inst,
672	rc_register_file file,
673	unsigned int index,
674	unsigned int mask)
675{
676	struct peephole_mul_cb_data * d = userdata;
677	if (rc_src_reads_dst_mask(file, mask, index,
678		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
679
680		d->Clobbered = 1;
681	}
682}
683
684static int peephole_mul_omod(
685	struct radeon_compiler * c,
686	struct rc_instruction * inst_mul,
687	struct rc_list * var_list)
688{
689	unsigned int chan, swz, i;
690	int const_index = -1;
691	int temp_index = -1;
692	float const_value;
693	rc_omod_op omod_op = RC_OMOD_DISABLE;
694	struct rc_list * writer_list;
695	struct rc_variable * var;
696	struct peephole_mul_cb_data cb_data;
697
698	for (i = 0; i < 2; i++) {
699		unsigned int j;
700		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
701			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
702			return 0;
703		}
704		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
705			if (temp_index != -1) {
706				/* The instruction has two temp sources */
707				return 0;
708			} else {
709				temp_index = i;
710				continue;
711			}
712		}
713		/* If we get this far Src[i] must be a constant src */
714		if (inst_mul->U.I.SrcReg[i].Negate) {
715			return 0;
716		}
717		/* The constant src needs to read from the same swizzle */
718		swz = RC_SWIZZLE_UNUSED;
719		chan = 0;
720		for (j = 0; j < 4; j++) {
721			unsigned int j_swz =
722				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
723			if (j_swz == RC_SWIZZLE_UNUSED) {
724				continue;
725			}
726			if (swz == RC_SWIZZLE_UNUSED) {
727				swz = j_swz;
728				chan = j;
729			} else if (j_swz != swz) {
730				return 0;
731			}
732		}
733
734		if (const_index != -1) {
735			/* The instruction has two constant sources */
736			return 0;
737		} else {
738			const_index = i;
739		}
740	}
741
742	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
743				inst_mul->U.I.SrcReg[const_index].Index)) {
744		return 0;
745	}
746	const_value = rc_get_constant_value(c,
747			inst_mul->U.I.SrcReg[const_index].Index,
748			inst_mul->U.I.SrcReg[const_index].Swizzle,
749			inst_mul->U.I.SrcReg[const_index].Negate,
750			chan);
751
752	if (const_value == 2.0f) {
753		omod_op = RC_OMOD_MUL_2;
754	} else if (const_value == 4.0f) {
755		omod_op = RC_OMOD_MUL_4;
756	} else if (const_value == 8.0f) {
757		omod_op = RC_OMOD_MUL_8;
758	} else if (const_value == (1.0f / 2.0f)) {
759		omod_op = RC_OMOD_DIV_2;
760	} else if (const_value == (1.0f / 4.0f)) {
761		omod_op = RC_OMOD_DIV_4;
762	} else if (const_value == (1.0f / 8.0f)) {
763		omod_op = RC_OMOD_DIV_8;
764	} else {
765		return 0;
766	}
767
768	writer_list = rc_variable_list_get_writers_one_reader(var_list,
769		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
770
771	if (!writer_list) {
772		return 0;
773	}
774
775	cb_data.Clobbered = 0;
776	cb_data.Writer = &inst_mul->U.I.DstReg;
777	for (var = writer_list->Item; var; var = var->Friend) {
778		struct rc_instruction * inst;
779		const struct rc_opcode_info * info = rc_get_opcode_info(
780				var->Inst->U.I.Opcode);
781		if (info->HasTexture) {
782			return 0;
783		}
784		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
785			return 0;
786		}
787		for (inst = inst_mul->Prev; inst != var->Inst;
788							inst = inst->Prev) {
789			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
790								&cb_data);
791			if (cb_data.Clobbered) {
792				break;
793			}
794		}
795	}
796
797	if (cb_data.Clobbered) {
798		return 0;
799	}
800
801	/* Rewrite the instructions */
802	for (var = writer_list->Item; var; var = var->Friend) {
803		struct rc_variable * writer = writer_list->Item;
804		writer->Inst->U.I.Omod = omod_op;
805		writer->Inst->U.I.DstReg = inst_mul->U.I.DstReg;
806		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
807	}
808
809	rc_remove_instruction(inst_mul);
810
811	return 1;
812}
813
814/**
815 * @return
816 * 	0 if inst is still part of the program.
817 * 	1 if inst is no longer part of the program.
818 */
819static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
820{
821	switch(inst->U.I.Opcode){
822	case RC_OPCODE_ADD:
823		if (c->has_presub) {
824			if(peephole_add_presub_inv(c, inst))
825				return 1;
826			if(peephole_add_presub_add(c, inst))
827				return 1;
828		}
829		break;
830	default:
831		break;
832	}
833	return 0;
834}
835
836void rc_optimize(struct radeon_compiler * c, void *user)
837{
838	struct rc_instruction * inst = c->Program.Instructions.Next;
839	struct rc_list * var_list;
840	while(inst != &c->Program.Instructions) {
841		struct rc_instruction * cur = inst;
842		inst = inst->Next;
843
844		constant_folding(c, cur);
845
846		if(peephole(c, cur))
847			continue;
848
849		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
850			copy_propagate(c, cur);
851			/* cur may no longer be part of the program */
852		}
853	}
854
855	if (!c->has_omod) {
856		return;
857	}
858
859	inst = c->Program.Instructions.Next;
860	while(inst != &c->Program.Instructions) {
861		struct rc_instruction * cur = inst;
862		inst = inst->Next;
863		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
864			var_list = rc_get_variables(c);
865			peephole_mul_omod(c, cur, var_list);
866		}
867	}
868}
869