1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright (C) 2009 Nicolai Haehnle.
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved.
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a copy of this software and associated documentation files (the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish,
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sublicense, and/or sell copies of the Software, and to
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions:
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * portions of the Software.
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_dataflow.h"
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler.h"
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler_util.h"
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_list.h"
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_swizzle.h"
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_variable.h"
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct src_clobbered_reads_cb_data {
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_register_file File;
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int Index;
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int Mask;
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data * ReaderData;
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgtypedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						struct rc_instruction *,
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						unsigned int);
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_src_register combine;
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	combine.File = inner.File;
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	combine.Index = inner.Index;
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	combine.RelAddr = inner.RelAddr;
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (outer.Abs) {
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		combine.Abs = 1;
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		combine.Negate = outer.Negate;
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		combine.Abs = inner.Abs;
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		combine.Negate ^= outer.Negate;
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return combine;
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						struct rc_src_register * src)
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_register_file file = src->File;
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data * reader_data = data;
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if(!rc_inst_can_use_presub(inst,
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				reader_data->Writer->U.I.PreSub.Opcode,
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				rc_swizzle_to_writemask(src->Swizzle),
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				src,
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				&reader_data->Writer->U.I.PreSub.SrcReg[0],
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* XXX This could probably be handled better. */
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (file == RC_FILE_ADDRESS) {
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* These instructions cannot read from the constants file.
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * see radeonTransformTEX()
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				(inst->U.I.Opcode == RC_OPCODE_TEX ||
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.Opcode == RC_OPCODE_TXB ||
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.Opcode == RC_OPCODE_TXP ||
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.Opcode == RC_OPCODE_TXD ||
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.Opcode == RC_OPCODE_TXL ||
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.Opcode == RC_OPCODE_KIL)){
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void src_clobbered_reads_cb(
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * data,
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_src_register * src)
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct src_clobbered_reads_cb_data * sc_data = data;
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (src->File == sc_data->File
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    && src->Index == sc_data->Index
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void is_src_clobbered_scan_write(
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * data,
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_register_file file,
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int index,
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int mask)
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct src_clobbered_reads_cb_data sc_data;
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data * reader_data = data;
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sc_data.File = file;
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sc_data.Index = index;
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sc_data.Mask = mask;
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sc_data.ReaderData = reader_data;
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_for_all_reads_src(reader_data->Writer,
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					src_clobbered_reads_cb, &sc_data);
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data reader_data;
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    inst_mov->U.I.WriteALUResult ||
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    inst_mov->U.I.SaturateMode)
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Get a list of all the readers of this MOV instruction. */
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader_data.ExitOnAbort = 1;
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_get_readers(c, inst_mov, &reader_data,
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		       copy_propagate_scan_read, NULL,
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		       is_src_clobbered_scan_write);
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (reader_data.Abort || reader_data.ReaderCount == 0)
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Propagate the MOV instruction. */
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < reader_data.ReaderCount; i++) {
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_instruction * inst = reader_data.Readers[i].Inst;
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.PreSub = inst_mov->U.I.PreSub;
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Finally, remove the original MOV instruction */
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_remove_instruction(inst_mov);
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Check if a source register is actually always the same
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * swizzle constant.
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int is_src_uniform_constant(struct rc_src_register src,
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_swizzle * pswz, unsigned int * pnegate)
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int have_used = 0;
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (src.File != RC_FILE_NONE) {
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		*pswz = 0;
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(unsigned int chan = 0; chan < 4; ++chan) {
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int swz = GET_SWZ(src.Swizzle, chan);
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz < 4) {
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*pswz = 0;
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_UNUSED)
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!have_used) {
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*pswz = swz;
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*pnegate = GET_BIT(src.Negate, chan);
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			have_used = 1;
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else {
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				*pswz = 0;
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return 0;
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding_mad(struct rc_instruction * inst)
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_swizzle swz = 0;
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int negate= 0;
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ZERO) {
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MUL;
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ONE) {
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_ADD;
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (negate)
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else if (swz == RC_SWIZZLE_ZERO) {
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ONE) {
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_ADD;
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (negate)
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else if (swz == RC_SWIZZLE_ZERO) {
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding_mul(struct rc_instruction * inst)
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_swizzle swz = 0;
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int negate = 0;
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ONE) {
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (negate)
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else if (swz == RC_SWIZZLE_ZERO) {
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ONE) {
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (negate)
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else if (swz == RC_SWIZZLE_ZERO) {
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding_add(struct rc_instruction * inst)
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_swizzle swz = 0;
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int negate = 0;
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ZERO) {
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ZERO) {
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.Opcode = RC_OPCODE_MOV;
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Replace 0.0, 1.0 and 0.5 immediate constants by their
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * respective swizzles. Simplify instructions like ADD dst, src, 0;
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_constant * constant;
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_src_register newsrc;
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		int have_real_reference;
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int chan;
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (chan = 0; chan < 4; ++chan)
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				break;
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (chan == 4) {
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* Convert immediates to swizzles. */
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		    inst->U.I.SrcReg[src].RelAddr ||
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		constant =
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (constant->Type != RC_CONSTANT_IMMEDIATE)
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		newsrc = inst->U.I.SrcReg[src];
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		have_real_reference = 0;
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (chan = 0; chan < 4; ++chan) {
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			unsigned int newswz;
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			float imm;
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			float baseimm;
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (swz >= 4)
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				continue;
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			imm = constant->u.Immediate[swz];
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			baseimm = imm;
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (imm < 0.0)
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				baseimm = -baseimm;
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (baseimm == 0.0) {
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				newswz = RC_SWIZZLE_ZERO;
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else if (baseimm == 1.0) {
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				newswz = RC_SWIZZLE_ONE;
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else if (baseimm == 0.5 && c->has_half_swizzles) {
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				newswz = RC_SWIZZLE_HALF;
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else {
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				have_real_reference = 1;
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				continue;
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			SET_SWZ(newsrc.Swizzle, chan, newswz);
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (imm < 0.0 && !newsrc.Abs)
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				newsrc.Negate ^= 1 << chan;
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!have_real_reference) {
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			newsrc.File = RC_FILE_NONE;
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			newsrc.Index = 0;
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* don't make the swizzle worse */
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst->U.I.SrcReg[src] = newsrc;
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Simplify instructions based on constants */
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst->U.I.Opcode == RC_OPCODE_MAD)
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		constant_folding_mad(inst);
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* note: MAD can simplify to MUL or ADD */
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst->U.I.Opcode == RC_OPCODE_MUL)
393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		constant_folding_mul(inst);
394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		constant_folding_add(inst);
396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* In case this instruction has been converted, make sure all of the
398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * registers that are no longer used are empty. */
399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	opcode = rc_get_opcode_info(inst->U.I.Opcode);
400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = opcode->NumSrcRegs; i < 3; i++) {
401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * If src and dst use the same register, this function returns a writemask that
407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * indicates wich components are read by src.  Otherwise zero is returned.
408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic unsigned int src_reads_dst_mask(struct rc_src_register src,
410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						struct rc_dst_register dst)
411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (dst.File != src.File || dst.Index != src.Index) {
413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return rc_swizzle_to_writemask(src.Swizzle);
416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in any of its channels.  Return 0 otherwise. */
420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int src_has_const_swz(struct rc_src_register src) {
421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int chan;
422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(chan = 0; chan < 4; chan++) {
423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int swz = GET_SWZ(src.Swizzle, chan);
424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						|| swz == RC_SWIZZLE_ONE) {
426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 1;
427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 0;
430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_scan_read(
433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * data,
434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_src_register * src)
436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data * reader_data = data;
438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_presubtract_op * presub_opcode = reader_data->CbData;
439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!rc_inst_can_use_presub(inst, *presub_opcode,
441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			reader_data->Writer->U.I.DstReg.WriteMask,
442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			src,
443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			&reader_data->Writer->U.I.SrcReg[0],
444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			&reader_data->Writer->U.I.SrcReg[1])) {
445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int presub_helper(
451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct radeon_compiler * c,
452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_add,
453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_presubtract_op presub_opcode,
454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_presub_replace_fn presub_replace)
455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data reader_data;
457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_presubtract_op cb_op = presub_opcode;
459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader_data.CbData = &cb_op;
461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader_data.ExitOnAbort = 1;
462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						is_src_clobbered_scan_write);
464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (reader_data.Abort || reader_data.ReaderCount == 0)
466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = 0; i < reader_data.ReaderCount; i++) {
469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int src_index;
470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_reader reader = reader_data.Readers[i];
471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		const struct rc_opcode_info * info =
472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				rc_get_opcode_info(reader.Inst->U.I.Opcode);
473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				presub_replace(inst_add, reader.Inst, src_index);
477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* This function assumes that inst_add->U.I.SrcReg[0] and
483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * inst_add->U.I.SrcReg[1] aren't both negative. */
484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_replace_add(
485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_add,
486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_reader,
487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int src_index)
488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_presubtract_op presub_opcode;
490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		presub_opcode = RC_PRESUB_SUB;
492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else
493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		presub_opcode = RC_PRESUB_ADD;
494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst_add->U.I.SrcReg[1].Negate) {
496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.PreSub.Opcode = presub_opcode;
505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.SrcReg[src_index] =
506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					inst_reader->U.I.PreSub.SrcReg[0]);
508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int is_presub_candidate(
513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct radeon_compiler * c,
514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst)
515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int is_constant[2] = {0, 0};
519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			|| inst->U.I.SaturateMode
524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			|| inst->U.I.WriteALUResult
525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			|| inst->U.I.Omod) {
526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* If both sources use a constant swizzle, then we can't convert it to
530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * a presubtract operation.  In fact for the ADD and SUB presubtract
531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * operations neither source can contain a constant swizzle.  This
532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * specific case is checked in peephole_add_presub_add() when
533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * we make sure the swizzles for both sources are equal, so we
534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * don't need to worry about it here. */
535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < 2; i++) {
536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		int chan;
537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (chan = 0; chan < 4; chan++) {
538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			rc_swizzle swz =
539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (swz == RC_SWIZZLE_ONE
541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					|| swz == RC_SWIZZLE_ZERO
542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					|| swz == RC_SWIZZLE_HALF) {
543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				is_constant[i] = 1;
544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (is_constant[0] && is_constant[1])
548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = 0; i < info->NumSrcRegs; i++) {
551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_src_register src = inst->U.I.SrcReg[i];
552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (src_reads_dst_mask(src, inst->U.I.DstReg))
553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		src.File = RC_FILE_PRESUB;
556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole_add_presub_add(
563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct radeon_compiler * c,
564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_add)
565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* src0 and src1 can't have absolute values */
574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	        return 0;
576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* presub_replace_add() assumes only one is negative */
578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	        return 0;
580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* if src0 is negative, at least all bits of dstmask have to be set */
582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	        return 0;
584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        /* if src1 is negative, at least all bits of dstmask have to be set */
586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	        return 0;
588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!is_presub_candidate(c, inst_add))
590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_remove_instruction(inst_add);
594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 1;
595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 0;
597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_replace_inv(
600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_add,
601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_reader,
602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int src_index)
603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* We must be careful not to modify inst_add, since it
605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * is possible it will remain part of the program.*/
606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						inst_reader->U.I.PreSub.SrcReg[0]);
611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the add instruction must have the constatnt 1 swizzle.  This function
620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * does not check const registers to see if their value is 1.0, so it should
621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * be called after the constant_folding optimization.
622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return
623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 	0 if the ADD instruction is still part of the program.
624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 	1 if the ADD instruction is no longer part of the program.
625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole_add_presub_inv(
627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct radeon_compiler * c,
628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_add)
629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i, swz;
631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!is_presub_candidate(c, inst_add))
633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Check if src0 is 1. */
636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* XXX It would be nice to use is_src_uniform_constant here, but that
637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * function only works if the register's file is RC_FILE_NONE */
638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = 0; i < 4; i++ ) {
639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						&& swz != RC_SWIZZLE_ONE) {
642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Check src1. */
647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						inst_add->U.I.DstReg.WriteMask
649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| inst_add->U.I.SrcReg[1].Abs
650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_remove_instruction(inst_add);
659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 1;
660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 0;
662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct peephole_mul_cb_data {
665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_dst_register * Writer;
666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int Clobbered;
667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void omod_filter_reader_cb(
670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * userdata,
671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_register_file file,
673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int index,
674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int mask)
675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct peephole_mul_cb_data * d = userdata;
677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (rc_src_reads_dst_mask(file, mask, index,
678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		d->Clobbered = 1;
681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void omod_filter_writer_cb(
685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * userdata,
686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_register_file file,
688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int index,
689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int mask)
690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct peephole_mul_cb_data * d = userdata;
692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (file == d->Writer->File && index == d->Writer->Index &&
693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					(mask & d->Writer->WriteMask)) {
694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		d->Clobbered = 1;
695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole_mul_omod(
699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct radeon_compiler * c,
700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_mul,
701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_list * var_list)
702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int chan = 0, swz, i;
704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int const_index = -1;
705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int temp_index = -1;
706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	float const_value;
707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_omod_op omod_op = RC_OMOD_DISABLE;
708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_list * writer_list;
709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_variable * var;
710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct peephole_mul_cb_data cb_data;
711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < 2; i++) {
713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int j;
714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (temp_index != -1) {
720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				/* The instruction has two temp sources */
721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return 0;
722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else {
723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				temp_index = i;
724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				continue;
725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* If we get this far Src[i] must be a constant src */
728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst_mul->U.I.SrcReg[i].Negate) {
729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* The constant src needs to read from the same swizzle */
732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		swz = RC_SWIZZLE_UNUSED;
733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		chan = 0;
734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (j = 0; j < 4; j++) {
735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			unsigned int j_swz =
736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (j_swz == RC_SWIZZLE_UNUSED) {
738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				continue;
739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (swz == RC_SWIZZLE_UNUSED) {
741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				swz = j_swz;
742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				chan = j;
743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else if (j_swz != swz) {
744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return 0;
745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (const_index != -1) {
749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			/* The instruction has two constant sources */
750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else {
752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			const_index = i;
753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				inst_mul->U.I.SrcReg[const_index].Index)) {
758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	const_value = rc_get_constant_value(c,
761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst_mul->U.I.SrcReg[const_index].Index,
762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst_mul->U.I.SrcReg[const_index].Swizzle,
763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst_mul->U.I.SrcReg[const_index].Negate,
764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			chan);
765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (const_value == 2.0f) {
767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		omod_op = RC_OMOD_MUL_2;
768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else if (const_value == 4.0f) {
769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		omod_op = RC_OMOD_MUL_4;
770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else if (const_value == 8.0f) {
771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		omod_op = RC_OMOD_MUL_8;
772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else if (const_value == (1.0f / 2.0f)) {
773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		omod_op = RC_OMOD_DIV_2;
774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else if (const_value == (1.0f / 4.0f)) {
775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		omod_op = RC_OMOD_DIV_4;
776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else if (const_value == (1.0f / 8.0f)) {
777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		omod_op = RC_OMOD_DIV_8;
778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	writer_list = rc_variable_list_get_writers_one_reader(var_list,
783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!writer_list) {
786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	cb_data.Clobbered = 0;
790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	cb_data.Writer = &inst_mul->U.I.DstReg;
791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (var = writer_list->Item; var; var = var->Friend) {
792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_instruction * inst;
793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		const struct rc_opcode_info * info = rc_get_opcode_info(
794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				var->Inst->U.I.Opcode);
795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (info->HasTexture) {
796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (inst = inst_mul->Prev; inst != var->Inst;
802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							inst = inst->Prev) {
803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org								&cb_data);
805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org								&cb_data);
807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (cb_data.Clobbered) {
808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				break;
809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (cb_data.Clobbered) {
814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Rewrite the instructions */
818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (var = writer_list->Item; var; var = var->Friend) {
819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_variable * writer = writer_list->Item;
820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned conversion_swizzle = rc_make_conversion_swizzle(
821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					writer->Inst->U.I.DstReg.WriteMask,
822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					inst_mul->U.I.DstReg.WriteMask);
823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		writer->Inst->U.I.Omod = omod_op;
824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_remove_instruction(inst_mul);
831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return
837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 	0 if inst is still part of the program.
838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 	1 if inst is no longer part of the program.
839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	switch(inst->U.I.Opcode){
843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_ADD:
844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (c->has_presub) {
845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if(peephole_add_presub_inv(c, inst))
846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return 1;
847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if(peephole_add_presub_add(c, inst))
848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return 1;
849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		break;
851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	default:
852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		break;
853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 0;
855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_optimize(struct radeon_compiler * c, void *user)
858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst = c->Program.Instructions.Next;
860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_list * var_list;
861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(inst != &c->Program.Instructions) {
862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_instruction * cur = inst;
863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst = inst->Next;
864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		constant_folding(c, cur);
866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if(peephole(c, cur))
868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			copy_propagate(c, cur);
872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			/* cur may no longer be part of the program */
873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!c->has_omod) {
877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst = c->Program.Instructions.Next;
881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(inst != &c->Program.Instructions) {
882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_instruction * cur = inst;
883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst = inst->Next;
884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			var_list = rc_get_variables(c);
886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			peephole_mul_omod(c, cur, var_list);
887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
890