1/*
2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23#include "radeon_emulate_branches.h"
24
25#include <stdio.h>
26
27#include "radeon_compiler.h"
28#include "radeon_dataflow.h"
29
30#define VERBOSE 0
31
32#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
33
34
35struct proxy_info {
36	unsigned int Proxied:1;
37	unsigned int Index:RC_REGISTER_INDEX_BITS;
38};
39
40struct register_proxies {
41	struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
42};
43
44struct branch_info {
45	struct rc_instruction * If;
46	struct rc_instruction * Else;
47};
48
49struct emulate_branch_state {
50	struct radeon_compiler * C;
51
52	struct branch_info * Branches;
53	unsigned int BranchCount;
54	unsigned int BranchReserved;
55};
56
57
58static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
59{
60	struct branch_info * branch;
61	struct rc_instruction * inst_mov;
62
63	memory_pool_array_reserve(&s->C->Pool, struct branch_info,
64			s->Branches, s->BranchCount, s->BranchReserved, 1);
65
66	DBG("%s\n", __FUNCTION__);
67
68	branch = &s->Branches[s->BranchCount++];
69	memset(branch, 0, sizeof(struct branch_info));
70	branch->If = inst;
71
72	/* Make a safety copy of the decision register, because we will need
73	 * it at ENDIF time and it might be overwritten in both branches. */
74	inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
75	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
76	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
77	inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
78	inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
79	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
80
81	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
82	inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
83	inst->U.I.SrcReg[0].Swizzle = 0;
84	inst->U.I.SrcReg[0].Abs = 0;
85	inst->U.I.SrcReg[0].Negate = 0;
86}
87
88static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
89{
90	struct branch_info * branch;
91
92	if (!s->BranchCount) {
93		rc_error(s->C, "Encountered ELSE outside of branches");
94		return;
95	}
96
97	DBG("%s\n", __FUNCTION__);
98
99	branch = &s->Branches[s->BranchCount - 1];
100	branch->Else = inst;
101}
102
103
104struct state_and_proxies {
105	struct emulate_branch_state * S;
106	struct register_proxies * Proxies;
107};
108
109static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
110			rc_register_file file, unsigned int index)
111{
112	if (file == RC_FILE_TEMPORARY) {
113		return &sap->Proxies->Temporary[index];
114	} else {
115		return 0;
116	}
117}
118
119static void scan_write(void * userdata, struct rc_instruction * inst,
120		rc_register_file file, unsigned int index, unsigned int comp)
121{
122	struct state_and_proxies * sap = userdata;
123	struct proxy_info * proxy = get_proxy_info(sap, file, index);
124
125	if (proxy && !proxy->Proxied) {
126		proxy->Proxied = 1;
127		proxy->Index = rc_find_free_temporary(sap->S->C);
128	}
129}
130
131static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
132		rc_register_file * pfile, unsigned int * pindex)
133{
134	struct state_and_proxies * sap = userdata;
135	struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
136
137	if (proxy && proxy->Proxied) {
138		*pfile = RC_FILE_TEMPORARY;
139		*pindex = proxy->Index;
140	}
141}
142
143/**
144 * Redirect all writes in the instruction range [begin, end) to proxy
145 * temporary registers.
146 */
147static void allocate_and_insert_proxies(struct emulate_branch_state * s,
148		struct register_proxies * proxies,
149		struct rc_instruction * begin,
150		struct rc_instruction * end)
151{
152	struct state_and_proxies sap;
153
154	sap.S = s;
155	sap.Proxies = proxies;
156
157	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
158		rc_for_all_writes_mask(inst, scan_write, &sap);
159		rc_remap_registers(inst, remap_proxy_function, &sap);
160	}
161
162	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
163		if (proxies->Temporary[index].Proxied) {
164			struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
165			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
166			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
167			inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
168			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
169			inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
170			inst_mov->U.I.SrcReg[0].Index = index;
171		}
172	}
173}
174
175
176static void inject_cmp(struct emulate_branch_state * s,
177		struct rc_instruction * inst_if,
178		struct rc_instruction * inst_endif,
179		rc_register_file file, unsigned int index,
180		struct proxy_info ifproxy,
181		struct proxy_info elseproxy)
182{
183	struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
184	inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
185	inst_cmp->U.I.DstReg.File = file;
186	inst_cmp->U.I.DstReg.Index = index;
187	inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
188	inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
189	inst_cmp->U.I.SrcReg[0].Abs = 1;
190	inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
191	inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
192	inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
193	inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
194	inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
195}
196
197static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
198{
199	struct branch_info * branch;
200	struct register_proxies IfProxies;
201	struct register_proxies ElseProxies;
202
203	if (!s->BranchCount) {
204		rc_error(s->C, "Encountered ENDIF outside of branches");
205		return;
206	}
207
208	DBG("%s\n", __FUNCTION__);
209
210	branch = &s->Branches[s->BranchCount - 1];
211
212	memset(&IfProxies, 0, sizeof(IfProxies));
213	memset(&ElseProxies, 0, sizeof(ElseProxies));
214
215	allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
216
217	if (branch->Else)
218		allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
219
220	/* Insert the CMP instructions at the end. */
221	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
222		if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
223			inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
224					IfProxies.Temporary[index], ElseProxies.Temporary[index]);
225		}
226	}
227
228	/* Remove all traces of the branch instructions */
229	rc_remove_instruction(branch->If);
230	if (branch->Else)
231		rc_remove_instruction(branch->Else);
232	rc_remove_instruction(inst);
233
234	s->BranchCount--;
235
236	if (VERBOSE) {
237		DBG("Program after ENDIF handling:\n");
238		rc_print_program(&s->C->Program);
239	}
240}
241
242
243struct remap_output_data {
244	unsigned int Output:RC_REGISTER_INDEX_BITS;
245	unsigned int Temporary:RC_REGISTER_INDEX_BITS;
246};
247
248static void remap_output_function(void * userdata, struct rc_instruction * inst,
249		rc_register_file * pfile, unsigned int * pindex)
250{
251	struct remap_output_data * data = userdata;
252
253	if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
254		*pfile = RC_FILE_TEMPORARY;
255		*pindex = data->Temporary;
256	}
257}
258
259
260/**
261 * Output registers cannot be read from and so cannot be dealt with like
262 * temporary registers.
263 *
264 * We do the simplest thing: If an output registers is written within
265 * a branch, then *all* writes to this register are proxied to a
266 * temporary register, and a final MOV is appended to the end of
267 * the program.
268 */
269static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
270{
271	const struct rc_opcode_info * opcode;
272
273	if (!s->BranchCount)
274		return;
275
276	opcode = rc_get_opcode_info(inst->U.I.Opcode);
277
278	if (!opcode->HasDstReg)
279		return;
280
281	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
282		struct remap_output_data remap;
283		struct rc_instruction * inst_mov;
284
285		remap.Output = inst->U.I.DstReg.Index;
286		remap.Temporary = rc_find_free_temporary(s->C);
287
288		for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
289		    inst != &s->C->Program.Instructions;
290		    inst = inst->Next) {
291			rc_remap_registers(inst, &remap_output_function, &remap);
292		}
293
294		inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
295		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
296		inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
297		inst_mov->U.I.DstReg.Index = remap.Output;
298		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
299		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
300		inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
301	}
302}
303
304/**
305 * Remove branch instructions; instead, execute both branches
306 * on different register sets and choose between their results
307 * using CMP instructions in place of the original ENDIF.
308 */
309void rc_emulate_branches(struct radeon_compiler *c, void *user)
310{
311	struct emulate_branch_state s;
312	struct rc_instruction * ptr;
313
314	memset(&s, 0, sizeof(s));
315	s.C = c;
316
317	/* Untypical loop because we may remove the current instruction */
318	ptr = c->Program.Instructions.Next;
319	while(ptr != &c->Program.Instructions) {
320		struct rc_instruction * inst = ptr;
321		ptr = ptr->Next;
322
323		if (inst->Type == RC_INSTRUCTION_NORMAL) {
324			switch(inst->U.I.Opcode) {
325			case RC_OPCODE_IF:
326				handle_if(&s, inst);
327				break;
328			case RC_OPCODE_ELSE:
329				handle_else(&s, inst);
330				break;
331			case RC_OPCODE_ENDIF:
332				handle_endif(&s, inst);
333				break;
334			default:
335				fix_output_writes(&s, inst);
336				break;
337			}
338		} else {
339			rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
340		}
341	}
342}
343