1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29#include "radeon_program_pair.h"
30
31#include <stdio.h>
32
33#include "main/glheader.h"
34#include "util/register_allocate.h"
35#include "util/u_memory.h"
36#include "util/ralloc.h"
37
38#include "r300_fragprog_swizzle.h"
39#include "radeon_compiler.h"
40#include "radeon_compiler_util.h"
41#include "radeon_dataflow.h"
42#include "radeon_list.h"
43#include "radeon_regalloc.h"
44#include "radeon_variable.h"
45
46#define VERBOSE 0
47
48#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
49
50
51
52struct register_info {
53	struct live_intervals Live[4];
54
55	unsigned int Used:1;
56	unsigned int Allocated:1;
57	unsigned int File:3;
58	unsigned int Index:RC_REGISTER_INDEX_BITS;
59	unsigned int Writemask;
60};
61
62struct regalloc_state {
63	struct radeon_compiler * C;
64
65	struct register_info * Input;
66	unsigned int NumInputs;
67
68	struct register_info * Temporary;
69	unsigned int NumTemporaries;
70
71	unsigned int Simple;
72	int LoopEnd;
73};
74
75struct rc_class {
76	enum rc_reg_class ID;
77
78	unsigned int WritemaskCount;
79
80	/** List of writemasks that belong to this class */
81	unsigned int Writemasks[3];
82
83
84};
85
86static const struct rc_class rc_class_list [] = {
87	{RC_REG_CLASS_SINGLE, 3,
88		{RC_MASK_X,
89		 RC_MASK_Y,
90		 RC_MASK_Z}},
91	{RC_REG_CLASS_DOUBLE, 3,
92		{RC_MASK_X | RC_MASK_Y,
93		 RC_MASK_X | RC_MASK_Z,
94		 RC_MASK_Y | RC_MASK_Z}},
95	{RC_REG_CLASS_TRIPLE, 1,
96		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
97		 RC_MASK_NONE,
98		 RC_MASK_NONE}},
99	{RC_REG_CLASS_ALPHA, 1,
100		{RC_MASK_W,
101		 RC_MASK_NONE,
102		 RC_MASK_NONE}},
103	{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
104		{RC_MASK_X | RC_MASK_W,
105		 RC_MASK_Y | RC_MASK_W,
106		 RC_MASK_Z | RC_MASK_W}},
107	{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
108		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
109		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
110		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
111	{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
112		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
113		RC_MASK_NONE,
114		RC_MASK_NONE}},
115	{RC_REG_CLASS_X, 1,
116		{RC_MASK_X,
117		RC_MASK_NONE,
118		RC_MASK_NONE}},
119	{RC_REG_CLASS_Y, 1,
120		{RC_MASK_Y,
121		RC_MASK_NONE,
122		RC_MASK_NONE}},
123	{RC_REG_CLASS_Z, 1,
124		{RC_MASK_Z,
125		RC_MASK_NONE,
126		RC_MASK_NONE}},
127	{RC_REG_CLASS_XY, 1,
128		{RC_MASK_X | RC_MASK_Y,
129		RC_MASK_NONE,
130		RC_MASK_NONE}},
131	{RC_REG_CLASS_YZ, 1,
132		{RC_MASK_Y | RC_MASK_Z,
133		RC_MASK_NONE,
134		RC_MASK_NONE}},
135	{RC_REG_CLASS_XZ, 1,
136		{RC_MASK_X | RC_MASK_Z,
137		RC_MASK_NONE,
138		RC_MASK_NONE}},
139	{RC_REG_CLASS_XW, 1,
140		{RC_MASK_X | RC_MASK_W,
141		RC_MASK_NONE,
142		RC_MASK_NONE}},
143	{RC_REG_CLASS_YW, 1,
144		{RC_MASK_Y | RC_MASK_W,
145		RC_MASK_NONE,
146		RC_MASK_NONE}},
147	{RC_REG_CLASS_ZW, 1,
148		{RC_MASK_Z | RC_MASK_W,
149		RC_MASK_NONE,
150		RC_MASK_NONE}},
151	{RC_REG_CLASS_XYW, 1,
152		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
153		RC_MASK_NONE,
154		RC_MASK_NONE}},
155	{RC_REG_CLASS_YZW, 1,
156		{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
157		RC_MASK_NONE,
158		RC_MASK_NONE}},
159	{RC_REG_CLASS_XZW, 1,
160		{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
161		RC_MASK_NONE,
162		RC_MASK_NONE}}
163};
164
165static void print_live_intervals(struct live_intervals * src)
166{
167	if (!src || !src->Used) {
168		DBG("(null)");
169		return;
170	}
171
172	DBG("(%i,%i)", src->Start, src->End);
173}
174
175static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
176{
177	if (VERBOSE) {
178		DBG("overlap_live_intervals: ");
179		print_live_intervals(a);
180		DBG(" to ");
181		print_live_intervals(b);
182		DBG("\n");
183	}
184
185	if (!a->Used || !b->Used) {
186		DBG("    unused interval\n");
187		return 0;
188	}
189
190	if (a->Start > b->Start) {
191		if (a->Start < b->End) {
192			DBG("    overlap\n");
193			return 1;
194		}
195	} else if (b->Start > a->Start) {
196		if (b->Start < a->End) {
197			DBG("    overlap\n");
198			return 1;
199		}
200	} else { /* a->Start == b->Start */
201		if (a->Start != a->End && b->Start != b->End) {
202			DBG("    overlap\n");
203			return 1;
204		}
205	}
206
207	DBG("    no overlap\n");
208
209	return 0;
210}
211
212static void scan_read_callback(void * data, struct rc_instruction * inst,
213		rc_register_file file, unsigned int index, unsigned int mask)
214{
215	struct regalloc_state * s = data;
216	struct register_info * reg;
217	unsigned int i;
218
219	if (file != RC_FILE_INPUT)
220		return;
221
222	s->Input[index].Used = 1;
223	reg = &s->Input[index];
224
225	for (i = 0; i < 4; i++) {
226		if (!((mask >> i) & 0x1)) {
227			continue;
228		}
229		reg->Live[i].Used = 1;
230		reg->Live[i].Start = 0;
231		reg->Live[i].End =
232			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
233	}
234}
235
236static void remap_register(void * data, struct rc_instruction * inst,
237		rc_register_file * file, unsigned int * index)
238{
239	struct regalloc_state * s = data;
240	const struct register_info * reg;
241
242	if (*file == RC_FILE_TEMPORARY && s->Simple)
243		reg = &s->Temporary[*index];
244	else if (*file == RC_FILE_INPUT)
245		reg = &s->Input[*index];
246	else
247		return;
248
249	if (reg->Allocated) {
250		*index = reg->Index;
251	}
252}
253
254static void alloc_input_simple(void * data, unsigned int input,
255							unsigned int hwreg)
256{
257	struct regalloc_state * s = data;
258
259	if (input >= s->NumInputs)
260		return;
261
262	s->Input[input].Allocated = 1;
263	s->Input[input].File = RC_FILE_TEMPORARY;
264	s->Input[input].Index = hwreg;
265}
266
267/* This functions offsets the temporary register indices by the number
268 * of input registers, because input registers are actually temporaries and
269 * should not occupy the same space.
270 *
271 * This pass is supposed to be used to maintain correct allocation of inputs
272 * if the standard register allocation is disabled. */
273static void do_regalloc_inputs_only(struct regalloc_state * s)
274{
275	for (unsigned i = 0; i < s->NumTemporaries; i++) {
276		s->Temporary[i].Allocated = 1;
277		s->Temporary[i].File = RC_FILE_TEMPORARY;
278		s->Temporary[i].Index = i + s->NumInputs;
279	}
280}
281
282static unsigned int is_derivative(rc_opcode op)
283{
284	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
285}
286
287static int find_class(
288	const struct rc_class * classes,
289	unsigned int writemask,
290	unsigned int max_writemask_count)
291{
292	unsigned int i;
293	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
294		unsigned int j;
295		if (classes[i].WritemaskCount > max_writemask_count) {
296			continue;
297		}
298		for (j = 0; j < 3; j++) {
299			if (classes[i].Writemasks[j] == writemask) {
300				return i;
301			}
302		}
303	}
304	return -1;
305}
306
307struct variable_get_class_cb_data {
308	unsigned int * can_change_writemask;
309	unsigned int conversion_swizzle;
310};
311
312static void variable_get_class_read_cb(
313	void * userdata,
314	struct rc_instruction * inst,
315	struct rc_pair_instruction_arg * arg,
316	struct rc_pair_instruction_source * src)
317{
318	struct variable_get_class_cb_data * d = userdata;
319	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
320							d->conversion_swizzle);
321	if (!r300_swizzle_is_native_basic(new_swizzle)) {
322		*d->can_change_writemask = 0;
323	}
324}
325
326static enum rc_reg_class variable_get_class(
327	struct rc_variable * variable,
328	const struct rc_class * classes)
329{
330	unsigned int i;
331	unsigned int can_change_writemask= 1;
332	unsigned int writemask = rc_variable_writemask_sum(variable);
333	struct rc_list * readers = rc_variable_readers_union(variable);
334	int class_index;
335
336	if (!variable->C->is_r500) {
337		struct rc_class c;
338		struct rc_variable * var_ptr;
339		/* The assumption here is that if an instruction has type
340		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
341		 * r300 and r400 can't swizzle the result of a TEX lookup. */
342		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
343			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
344				writemask = RC_MASK_XYZW;
345			}
346		}
347
348		/* Check if it is possible to do swizzle packing for r300/r400
349		 * without creating non-native swizzles. */
350		class_index = find_class(classes, writemask, 3);
351		if (class_index < 0) {
352			goto error;
353		}
354		c = classes[class_index];
355		if (c.WritemaskCount == 1) {
356			goto done;
357		}
358		for (i = 0; i < c.WritemaskCount; i++) {
359			struct rc_variable * var_ptr;
360			for (var_ptr = variable; var_ptr;
361						var_ptr = var_ptr->Friend) {
362				int j;
363				unsigned int conversion_swizzle =
364						rc_make_conversion_swizzle(
365						writemask, c.Writemasks[i]);
366				struct variable_get_class_cb_data d;
367				d.can_change_writemask = &can_change_writemask;
368				d.conversion_swizzle = conversion_swizzle;
369				/* If we get this far var_ptr->Inst has to
370				 * be a pair instruction.  If variable or any
371				 * of its friends are normal instructions,
372				 * then the writemask will be set to RC_MASK_XYZW
373				 * and the function will return before it gets
374				 * here. */
375				rc_pair_for_all_reads_arg(var_ptr->Inst,
376					variable_get_class_read_cb, &d);
377
378				for (j = 0; j < var_ptr->ReaderCount; j++) {
379					unsigned int old_swizzle;
380					unsigned int new_swizzle;
381					struct rc_reader r = var_ptr->Readers[j];
382					if (r.Inst->Type ==
383							RC_INSTRUCTION_PAIR ) {
384						old_swizzle = r.U.P.Arg->Swizzle;
385					} else {
386						/* Source operands of TEX
387						 * instructions can't be
388						 * swizzle on r300/r400 GPUs.
389						 */
390						can_change_writemask = 0;
391						break;
392					}
393					new_swizzle = rc_adjust_channels(
394						old_swizzle, conversion_swizzle);
395					if (!r300_swizzle_is_native_basic(
396								new_swizzle)) {
397						can_change_writemask = 0;
398						break;
399					}
400				}
401				if (!can_change_writemask) {
402					break;
403				}
404			}
405			if (!can_change_writemask) {
406				break;
407			}
408		}
409	}
410
411	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
412		/* DDX/DDY seem to always fail when their writemasks are
413		 * changed.*/
414		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
415		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
416			can_change_writemask = 0;
417		}
418	}
419	for ( ; readers; readers = readers->Next) {
420		struct rc_reader * r = readers->Item;
421		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
422			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
423				can_change_writemask = 0;
424				break;
425			}
426			/* DDX/DDY also fail when their swizzles are changed. */
427			if (is_derivative(r->Inst->U.P.RGB.Opcode)
428			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
429				can_change_writemask = 0;
430				break;
431			}
432		}
433	}
434
435	class_index = find_class(classes, writemask,
436						can_change_writemask ? 3 : 1);
437done:
438	if (class_index > -1) {
439		return classes[class_index].ID;
440	} else {
441error:
442		rc_error(variable->C,
443				"Could not find class for index=%u mask=%u\n",
444				variable->Dst.Index, writemask);
445		return 0;
446	}
447}
448
449static unsigned int overlap_live_intervals_array(
450	struct live_intervals * a,
451	struct live_intervals * b)
452{
453	unsigned int a_chan, b_chan;
454	for (a_chan = 0; a_chan < 4; a_chan++) {
455		for (b_chan = 0; b_chan < 4; b_chan++) {
456			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
457					return 1;
458			}
459		}
460	}
461	return 0;
462}
463
464static unsigned int reg_get_index(int reg)
465{
466	return reg / RC_MASK_XYZW;
467}
468
469static unsigned int reg_get_writemask(int reg)
470{
471	return (reg % RC_MASK_XYZW) + 1;
472}
473
474static int get_reg_id(unsigned int index, unsigned int writemask)
475{
476	assert(writemask);
477	if (writemask == 0) {
478		return 0;
479	}
480	return (index * RC_MASK_XYZW) + (writemask - 1);
481}
482
483#if VERBOSE
484static void print_reg(int reg)
485{
486	unsigned int index = reg_get_index(reg);
487	unsigned int mask = reg_get_writemask(reg);
488	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
489		mask & RC_MASK_X ? 'x' : '_',
490		mask & RC_MASK_Y ? 'y' : '_',
491		mask & RC_MASK_Z ? 'z' : '_',
492		mask & RC_MASK_W ? 'w' : '_');
493}
494#endif
495
496static void add_register_conflicts(
497	struct ra_regs * regs,
498	unsigned int max_temp_regs)
499{
500	unsigned int index, a_mask, b_mask;
501	for (index = 0; index < max_temp_regs; index++) {
502		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
503			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
504								b_mask++) {
505				if (a_mask & b_mask) {
506					ra_add_reg_conflict(regs,
507						get_reg_id(index, a_mask),
508						get_reg_id(index, b_mask));
509				}
510			}
511		}
512	}
513}
514
515static void do_advanced_regalloc(struct regalloc_state * s)
516{
517
518	unsigned int i, input_node, node_count, node_index;
519	unsigned int * node_classes;
520	struct rc_instruction * inst;
521	struct rc_list * var_ptr;
522	struct rc_list * variables;
523	struct ra_graph * graph;
524	const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
525
526	/* Get list of program variables */
527	variables = rc_get_variables(s->C);
528	node_count = rc_list_count(variables);
529	node_classes = memory_pool_malloc(&s->C->Pool,
530			node_count * sizeof(unsigned int));
531
532	for (var_ptr = variables, node_index = 0; var_ptr;
533					var_ptr = var_ptr->Next, node_index++) {
534		unsigned int class_index;
535		/* Compute the live intervals */
536		rc_variable_compute_live_intervals(var_ptr->Item);
537
538		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
539		node_classes[node_index] = ra_state->class_ids[class_index];
540	}
541
542
543	/* Calculate live intervals for input registers */
544	for (inst = s->C->Program.Instructions.Next;
545					inst != &s->C->Program.Instructions;
546					inst = inst->Next) {
547		rc_opcode op = rc_get_flow_control_inst(inst);
548		if (op == RC_OPCODE_BGNLOOP) {
549			struct rc_instruction * endloop =
550							rc_match_bgnloop(inst);
551			if (endloop->IP > s->LoopEnd) {
552				s->LoopEnd = endloop->IP;
553			}
554		}
555		rc_for_all_reads_mask(inst, scan_read_callback, s);
556	}
557
558	/* Compute the writemask for inputs. */
559	for (i = 0; i < s->NumInputs; i++) {
560		unsigned int chan, writemask = 0;
561		for (chan = 0; chan < 4; chan++) {
562			if (s->Input[i].Live[chan].Used) {
563				writemask |= (1 << chan);
564			}
565		}
566		s->Input[i].Writemask = writemask;
567	}
568
569	graph = ra_alloc_interference_graph(ra_state->regs,
570						node_count + s->NumInputs);
571
572	for (node_index = 0; node_index < node_count; node_index++) {
573		ra_set_node_class(graph, node_index, node_classes[node_index]);
574	}
575
576	/* Build the interference graph */
577	for (var_ptr = variables, node_index = 0; var_ptr;
578					var_ptr = var_ptr->Next,node_index++) {
579		struct rc_list * a, * b;
580		unsigned int b_index;
581
582		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
583						b; b = b->Next, b_index++) {
584			struct rc_variable * var_a = a->Item;
585			while (var_a) {
586				struct rc_variable * var_b = b->Item;
587				while (var_b) {
588					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
589						ra_add_node_interference(graph,
590							node_index, b_index);
591					}
592					var_b = var_b->Friend;
593				}
594				var_a = var_a->Friend;
595			}
596		}
597	}
598
599	/* Add input registers to the interference graph */
600	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
601		if (!s->Input[i].Writemask) {
602			continue;
603		}
604		for (var_ptr = variables, node_index = 0;
605				var_ptr; var_ptr = var_ptr->Next, node_index++) {
606			struct rc_variable * var = var_ptr->Item;
607			if (overlap_live_intervals_array(s->Input[i].Live,
608								var->Live)) {
609				ra_add_node_interference(graph, node_index,
610						node_count + input_node);
611			}
612		}
613		/* Manually allocate a register for this input */
614		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
615				s->Input[i].Index, s->Input[i].Writemask));
616		input_node++;
617	}
618
619	if (!ra_allocate(graph)) {
620		rc_error(s->C, "Ran out of hardware temporaries\n");
621		return;
622	}
623
624	/* Rewrite the registers */
625	for (var_ptr = variables, node_index = 0; var_ptr;
626				var_ptr = var_ptr->Next, node_index++) {
627		int reg = ra_get_node_reg(graph, node_index);
628		unsigned int writemask = reg_get_writemask(reg);
629		unsigned int index = reg_get_index(reg);
630		struct rc_variable * var = var_ptr->Item;
631
632		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
633			writemask = rc_variable_writemask_sum(var);
634		}
635
636		if (var->Dst.File == RC_FILE_INPUT) {
637			continue;
638		}
639		rc_variable_change_dst(var, index, writemask);
640	}
641
642	ralloc_free(graph);
643}
644
645void rc_init_regalloc_state(struct rc_regalloc_state *s)
646{
647	unsigned i, j, index;
648	unsigned **ra_q_values;
649
650	/* Pre-computed q values.  This array describes the maximum number of
651	 * a class's [row] registers that are in conflict with a single
652	 * register from another class [column].
653	 *
654	 * For example:
655	 * q_values[0][2] is 3, because a register from class 2
656	 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
657	 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
658	 * and T0.z.
659	 *
660	 * q_values[2][0] is 1, because a register from class 0
661	 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
662	 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
663	 *
664	 * The q values for each register class [row] will never be greater
665	 * than the maximum number of writemask combinations for that class.
666	 *
667	 * For example:
668	 *
669	 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
670	 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
671	 * than 1.
672	 */
673	const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
674	{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
675	{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
676	{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
677	{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
678	{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
679	{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
680	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
681	{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
682	{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
683	{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
684	{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
685	{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
686	{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
687	{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
688	{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
689	{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
690	{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
691	{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
692	{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
693	};
694
695	/* Allocate the main ra data structure */
696	s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
697                                   true);
698
699	/* Create the register classes */
700	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
701		const struct rc_class *class = &rc_class_list[i];
702		s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
703
704		/* Assign registers to the classes */
705		for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
706			for (j = 0; j < class->WritemaskCount; j++) {
707				int reg_id = get_reg_id(index,
708						class->Writemasks[j]);
709				ra_class_add_reg(s->regs,
710					s->class_ids[class->ID], reg_id);
711			}
712		}
713	}
714
715	/* Set the q values.  The q_values array is indexed based on
716	 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
717	 * different than the ID assigned to that class by ra.
718	 * This why we need to manually construct this list.
719	 */
720	ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
721
722	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
723		ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
724		for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
725			ra_q_values[s->class_ids[i]][s->class_ids[j]] =
726							q_values[i][j];
727		}
728	}
729
730	/* Add register conflicts */
731	add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
732
733	ra_set_finalize(s->regs, ra_q_values);
734
735	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
736		FREE(ra_q_values[i]);
737	}
738	FREE(ra_q_values);
739}
740
741void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
742{
743	ralloc_free(s->regs);
744}
745
746/**
747 * @param user This parameter should be a pointer to an integer value.  If this
748 * integer value is zero, then a simple register allocator will be used that
749 * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
750 * user is non-zero, then the regular register allocator will be used
751 * (\sa do_regalloc).
752  */
753void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
754{
755	struct r300_fragment_program_compiler *c =
756				(struct r300_fragment_program_compiler*)cc;
757	struct regalloc_state s;
758	int * do_full_regalloc = (int*)user;
759
760	memset(&s, 0, sizeof(s));
761	s.C = cc;
762	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
763	s.Input = memory_pool_malloc(&cc->Pool,
764			s.NumInputs * sizeof(struct register_info));
765	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
766
767	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
768	s.Temporary = memory_pool_malloc(&cc->Pool,
769			s.NumTemporaries * sizeof(struct register_info));
770	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
771
772	rc_recompute_ips(s.C);
773
774	c->AllocateHwInputs(c, &alloc_input_simple, &s);
775	if (*do_full_regalloc) {
776		do_advanced_regalloc(&s);
777	} else {
778		s.Simple = 1;
779		do_regalloc_inputs_only(&s);
780	}
781
782	/* Rewrite inputs and if we are doing the simple allocation, rewrite
783	 * temporaries too. */
784	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
785					inst != &s.C->Program.Instructions;
786					inst = inst->Next) {
787		rc_remap_registers(inst, &remap_register, &s);
788	}
789}
790