1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29#include "radeon_program_pair.h"
30
31#include <stdio.h>
32
33#include "main/glheader.h"
34#include "program/register_allocate.h"
35#include "ralloc.h"
36
37#include "r300_fragprog_swizzle.h"
38#include "radeon_compiler.h"
39#include "radeon_compiler_util.h"
40#include "radeon_dataflow.h"
41#include "radeon_list.h"
42#include "radeon_variable.h"
43
44#define VERBOSE 0
45
46#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
47
48
49
50struct register_info {
51	struct live_intervals Live[4];
52
53	unsigned int Used:1;
54	unsigned int Allocated:1;
55	unsigned int File:3;
56	unsigned int Index:RC_REGISTER_INDEX_BITS;
57	unsigned int Writemask;
58};
59
60struct regalloc_state {
61	struct radeon_compiler * C;
62
63	struct register_info * Input;
64	unsigned int NumInputs;
65
66	struct register_info * Temporary;
67	unsigned int NumTemporaries;
68
69	unsigned int Simple;
70	int LoopEnd;
71};
72
73enum rc_reg_class {
74	RC_REG_CLASS_SINGLE,
75	RC_REG_CLASS_DOUBLE,
76	RC_REG_CLASS_TRIPLE,
77	RC_REG_CLASS_ALPHA,
78	RC_REG_CLASS_SINGLE_PLUS_ALPHA,
79	RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
80	RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
81	RC_REG_CLASS_X,
82	RC_REG_CLASS_Y,
83	RC_REG_CLASS_Z,
84	RC_REG_CLASS_XY,
85	RC_REG_CLASS_YZ,
86	RC_REG_CLASS_XZ,
87	RC_REG_CLASS_XW,
88	RC_REG_CLASS_YW,
89	RC_REG_CLASS_ZW,
90	RC_REG_CLASS_XYW,
91	RC_REG_CLASS_YZW,
92	RC_REG_CLASS_XZW,
93	RC_REG_CLASS_COUNT
94};
95
96struct rc_class {
97	enum rc_reg_class Class;
98
99	unsigned int WritemaskCount;
100
101	/** This is 1 if this class is being used by the register allocator
102	 * and 0 otherwise */
103	unsigned int Used;
104
105	/** This is the ID number assigned to this class by ra. */
106	unsigned int Id;
107
108	/** List of writemasks that belong to this class */
109	unsigned int Writemasks[3];
110
111
112};
113
114static void print_live_intervals(struct live_intervals * src)
115{
116	if (!src || !src->Used) {
117		DBG("(null)");
118		return;
119	}
120
121	DBG("(%i,%i)", src->Start, src->End);
122}
123
124static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
125{
126	if (VERBOSE) {
127		DBG("overlap_live_intervals: ");
128		print_live_intervals(a);
129		DBG(" to ");
130		print_live_intervals(b);
131		DBG("\n");
132	}
133
134	if (!a->Used || !b->Used) {
135		DBG("    unused interval\n");
136		return 0;
137	}
138
139	if (a->Start > b->Start) {
140		if (a->Start < b->End) {
141			DBG("    overlap\n");
142			return 1;
143		}
144	} else if (b->Start > a->Start) {
145		if (b->Start < a->End) {
146			DBG("    overlap\n");
147			return 1;
148		}
149	} else { /* a->Start == b->Start */
150		if (a->Start != a->End && b->Start != b->End) {
151			DBG("    overlap\n");
152			return 1;
153		}
154	}
155
156	DBG("    no overlap\n");
157
158	return 0;
159}
160
161static void scan_read_callback(void * data, struct rc_instruction * inst,
162		rc_register_file file, unsigned int index, unsigned int mask)
163{
164	struct regalloc_state * s = data;
165	struct register_info * reg;
166	unsigned int i;
167
168	if (file != RC_FILE_INPUT)
169		return;
170
171	s->Input[index].Used = 1;
172	reg = &s->Input[index];
173
174	for (i = 0; i < 4; i++) {
175		if (!((mask >> i) & 0x1)) {
176			continue;
177		}
178		reg->Live[i].Used = 1;
179		reg->Live[i].Start = 0;
180		reg->Live[i].End =
181			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
182	}
183}
184
185static void remap_register(void * data, struct rc_instruction * inst,
186		rc_register_file * file, unsigned int * index)
187{
188	struct regalloc_state * s = data;
189	const struct register_info * reg;
190
191	if (*file == RC_FILE_TEMPORARY && s->Simple)
192		reg = &s->Temporary[*index];
193	else if (*file == RC_FILE_INPUT)
194		reg = &s->Input[*index];
195	else
196		return;
197
198	if (reg->Allocated) {
199		*index = reg->Index;
200	}
201}
202
203static void alloc_input_simple(void * data, unsigned int input,
204							unsigned int hwreg)
205{
206	struct regalloc_state * s = data;
207
208	if (input >= s->NumInputs)
209		return;
210
211	s->Input[input].Allocated = 1;
212	s->Input[input].File = RC_FILE_TEMPORARY;
213	s->Input[input].Index = hwreg;
214}
215
216/* This functions offsets the temporary register indices by the number
217 * of input registers, because input registers are actually temporaries and
218 * should not occupy the same space.
219 *
220 * This pass is supposed to be used to maintain correct allocation of inputs
221 * if the standard register allocation is disabled. */
222static void do_regalloc_inputs_only(struct regalloc_state * s)
223{
224	for (unsigned i = 0; i < s->NumTemporaries; i++) {
225		s->Temporary[i].Allocated = 1;
226		s->Temporary[i].File = RC_FILE_TEMPORARY;
227		s->Temporary[i].Index = i + s->NumInputs;
228	}
229}
230
231static unsigned int is_derivative(rc_opcode op)
232{
233	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
234}
235
236static int find_class(
237	struct rc_class * classes,
238	unsigned int writemask,
239	unsigned int max_writemask_count)
240{
241	unsigned int i;
242	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
243		unsigned int j;
244		if (classes[i].WritemaskCount > max_writemask_count) {
245			continue;
246		}
247		for (j = 0; j < 3; j++) {
248			if (classes[i].Writemasks[j] == writemask) {
249				return i;
250			}
251		}
252	}
253	return -1;
254}
255
256struct variable_get_class_cb_data {
257	unsigned int * can_change_writemask;
258	unsigned int conversion_swizzle;
259};
260
261static void variable_get_class_read_cb(
262	void * userdata,
263	struct rc_instruction * inst,
264	struct rc_pair_instruction_arg * arg,
265	struct rc_pair_instruction_source * src)
266{
267	struct variable_get_class_cb_data * d = userdata;
268	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
269							d->conversion_swizzle);
270	if (!r300_swizzle_is_native_basic(new_swizzle)) {
271		*d->can_change_writemask = 0;
272	}
273}
274
275static enum rc_reg_class variable_get_class(
276	struct rc_variable * variable,
277	struct rc_class * classes)
278{
279	unsigned int i;
280	unsigned int can_change_writemask= 1;
281	unsigned int writemask = rc_variable_writemask_sum(variable);
282	struct rc_list * readers = rc_variable_readers_union(variable);
283	int class_index;
284
285	if (!variable->C->is_r500) {
286		struct rc_class c;
287		struct rc_variable * var_ptr;
288		/* The assumption here is that if an instruction has type
289		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
290		 * r300 and r400 can't swizzle the result of a TEX lookup. */
291		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
292			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
293				writemask = RC_MASK_XYZW;
294			}
295		}
296
297		/* Check if it is possible to do swizzle packing for r300/r400
298		 * without creating non-native swizzles. */
299		class_index = find_class(classes, writemask, 3);
300		if (class_index < 0) {
301			goto error;
302		}
303		c = classes[class_index];
304		if (c.WritemaskCount == 1) {
305			goto done;
306		}
307		for (i = 0; i < c.WritemaskCount; i++) {
308			struct rc_variable * var_ptr;
309			for (var_ptr = variable; var_ptr;
310						var_ptr = var_ptr->Friend) {
311				int j;
312				unsigned int conversion_swizzle =
313						rc_make_conversion_swizzle(
314						writemask, c.Writemasks[i]);
315				struct variable_get_class_cb_data d;
316				d.can_change_writemask = &can_change_writemask;
317				d.conversion_swizzle = conversion_swizzle;
318				/* If we get this far var_ptr->Inst has to
319				 * be a pair instruction.  If variable or any
320				 * of its friends are normal instructions,
321				 * then the writemask will be set to RC_MASK_XYZW
322				 * and the function will return before it gets
323				 * here. */
324				rc_pair_for_all_reads_arg(var_ptr->Inst,
325					variable_get_class_read_cb, &d);
326
327				for (j = 0; j < var_ptr->ReaderCount; j++) {
328					unsigned int old_swizzle;
329					unsigned int new_swizzle;
330					struct rc_reader r = var_ptr->Readers[j];
331					if (r.Inst->Type ==
332							RC_INSTRUCTION_PAIR ) {
333						old_swizzle = r.U.P.Arg->Swizzle;
334					} else {
335						old_swizzle = r.U.I.Src->Swizzle;
336					}
337					new_swizzle = rc_adjust_channels(
338						old_swizzle, conversion_swizzle);
339					if (!r300_swizzle_is_native_basic(
340								new_swizzle)) {
341						can_change_writemask = 0;
342						break;
343					}
344				}
345				if (!can_change_writemask) {
346					break;
347				}
348			}
349			if (!can_change_writemask) {
350				break;
351			}
352		}
353	}
354
355	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
356		/* DDX/DDY seem to always fail when their writemasks are
357		 * changed.*/
358		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
359		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
360			can_change_writemask = 0;
361		}
362	}
363	for ( ; readers; readers = readers->Next) {
364		struct rc_reader * r = readers->Item;
365		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
366			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
367				can_change_writemask = 0;
368				break;
369			}
370			/* DDX/DDY also fail when their swizzles are changed. */
371			if (is_derivative(r->Inst->U.P.RGB.Opcode)
372			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
373				can_change_writemask = 0;
374				break;
375			}
376		}
377	}
378
379	class_index = find_class(classes, writemask,
380						can_change_writemask ? 3 : 1);
381done:
382	if (class_index > -1) {
383		return classes[class_index].Class;
384	} else {
385error:
386		rc_error(variable->C,
387				"Could not find class for index=%u mask=%u\n",
388				variable->Dst.Index, writemask);
389		return 0;
390	}
391}
392
393static unsigned int overlap_live_intervals_array(
394	struct live_intervals * a,
395	struct live_intervals * b)
396{
397	unsigned int a_chan, b_chan;
398	for (a_chan = 0; a_chan < 4; a_chan++) {
399		for (b_chan = 0; b_chan < 4; b_chan++) {
400			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
401					return 1;
402			}
403		}
404	}
405	return 0;
406}
407
408static unsigned int reg_get_index(int reg)
409{
410	return reg / RC_MASK_XYZW;
411}
412
413static unsigned int reg_get_writemask(int reg)
414{
415	return (reg % RC_MASK_XYZW) + 1;
416}
417
418static int get_reg_id(unsigned int index, unsigned int writemask)
419{
420	assert(writemask);
421	if (writemask == 0) {
422		return 0;
423	}
424	return (index * RC_MASK_XYZW) + (writemask - 1);
425}
426
427#if VERBOSE
428static void print_reg(int reg)
429{
430	unsigned int index = reg_get_index(reg);
431	unsigned int mask = reg_get_writemask(reg);
432	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
433		mask & RC_MASK_X ? 'x' : '_',
434		mask & RC_MASK_Y ? 'y' : '_',
435		mask & RC_MASK_Z ? 'z' : '_',
436		mask & RC_MASK_W ? 'w' : '_');
437}
438#endif
439
440static void add_register_conflicts(
441	struct ra_regs * regs,
442	unsigned int max_temp_regs)
443{
444	unsigned int index, a_mask, b_mask;
445	for (index = 0; index < max_temp_regs; index++) {
446		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
447			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
448								b_mask++) {
449				if (a_mask & b_mask) {
450					ra_add_reg_conflict(regs,
451						get_reg_id(index, a_mask),
452						get_reg_id(index, b_mask));
453				}
454			}
455		}
456	}
457}
458
459static void do_advanced_regalloc(struct regalloc_state * s)
460{
461	struct rc_class rc_class_list [] = {
462		{RC_REG_CLASS_SINGLE, 3, 0, 0,
463			{RC_MASK_X,
464			 RC_MASK_Y,
465			 RC_MASK_Z}},
466		{RC_REG_CLASS_DOUBLE, 3, 0, 0,
467			{RC_MASK_X | RC_MASK_Y,
468			 RC_MASK_X | RC_MASK_Z,
469			 RC_MASK_Y | RC_MASK_Z}},
470		{RC_REG_CLASS_TRIPLE, 1, 0, 0,
471			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
472			 RC_MASK_NONE,
473			 RC_MASK_NONE}},
474		{RC_REG_CLASS_ALPHA, 1, 0, 0,
475			{RC_MASK_W,
476			 RC_MASK_NONE,
477			 RC_MASK_NONE}},
478		{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
479			{RC_MASK_X | RC_MASK_W,
480			 RC_MASK_Y | RC_MASK_W,
481			 RC_MASK_Z | RC_MASK_W}},
482		{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
483			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
484			 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
485			 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
486		{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
487			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
488			RC_MASK_NONE,
489			RC_MASK_NONE}},
490		{RC_REG_CLASS_X, 1, 0, 0,
491			{RC_MASK_X,
492			RC_MASK_NONE,
493			RC_MASK_NONE}},
494		{RC_REG_CLASS_Y, 1, 0, 0,
495			{RC_MASK_Y,
496			RC_MASK_NONE,
497			RC_MASK_NONE}},
498		{RC_REG_CLASS_Z, 1, 0, 0,
499			{RC_MASK_Z,
500			RC_MASK_NONE,
501			RC_MASK_NONE}},
502		{RC_REG_CLASS_XY, 1, 0, 0,
503			{RC_MASK_X | RC_MASK_Y,
504			RC_MASK_NONE,
505			RC_MASK_NONE}},
506		{RC_REG_CLASS_YZ, 1, 0, 0,
507			{RC_MASK_Y | RC_MASK_Z,
508			RC_MASK_NONE,
509			RC_MASK_NONE}},
510		{RC_REG_CLASS_XZ, 1, 0, 0,
511			{RC_MASK_X | RC_MASK_Z,
512			RC_MASK_NONE,
513			RC_MASK_NONE}},
514		{RC_REG_CLASS_XW, 1, 0, 0,
515			{RC_MASK_X | RC_MASK_W,
516			RC_MASK_NONE,
517			RC_MASK_NONE}},
518		{RC_REG_CLASS_YW, 1, 0, 0,
519			{RC_MASK_Y | RC_MASK_W,
520			RC_MASK_NONE,
521			RC_MASK_NONE}},
522		{RC_REG_CLASS_ZW, 1, 0, 0,
523			{RC_MASK_Z | RC_MASK_W,
524			RC_MASK_NONE,
525			RC_MASK_NONE}},
526		{RC_REG_CLASS_XYW, 1, 0, 0,
527			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
528			RC_MASK_NONE,
529			RC_MASK_NONE}},
530		{RC_REG_CLASS_YZW, 1, 0, 0,
531			{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
532			RC_MASK_NONE,
533			RC_MASK_NONE}},
534		{RC_REG_CLASS_XZW, 1, 0, 0,
535			{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
536			RC_MASK_NONE,
537			RC_MASK_NONE}}
538	};
539
540	unsigned int i, j, index, input_node, node_count, node_index;
541	unsigned int * node_classes;
542	unsigned int * input_classes;
543	struct rc_instruction * inst;
544	struct rc_list * var_ptr;
545	struct rc_list * variables;
546	struct ra_regs * regs;
547	struct ra_graph * graph;
548
549	/* Allocate the main ra data structure */
550	regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW);
551
552	/* Get list of program variables */
553	variables = rc_get_variables(s->C);
554	node_count = rc_list_count(variables);
555	node_classes = memory_pool_malloc(&s->C->Pool,
556			node_count * sizeof(unsigned int));
557	input_classes = memory_pool_malloc(&s->C->Pool,
558			s->NumInputs * sizeof(unsigned int));
559
560	for (var_ptr = variables, node_index = 0; var_ptr;
561					var_ptr = var_ptr->Next, node_index++) {
562		unsigned int class_index;
563		/* Compute the live intervals */
564		rc_variable_compute_live_intervals(var_ptr->Item);
565
566		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
567
568		/* If we haven't used this register class yet, mark it
569		 * as used and allocate space for it. */
570		if (!rc_class_list[class_index].Used) {
571			rc_class_list[class_index].Used = 1;
572			rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
573		}
574
575		node_classes[node_index] = rc_class_list[class_index].Id;
576	}
577
578
579	/* Assign registers to the classes */
580	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
581		struct rc_class class = rc_class_list[i];
582		if (!class.Used) {
583			continue;
584		}
585
586		for (index = 0; index < s->C->max_temp_regs; index++) {
587			for (j = 0; j < class.WritemaskCount; j++) {
588				int reg_id = get_reg_id(index,
589							class.Writemasks[j]);
590				ra_class_add_reg(regs, class.Id, reg_id);
591			}
592		}
593	}
594
595	/* Add register conflicts */
596	add_register_conflicts(regs, s->C->max_temp_regs);
597
598	/* Calculate live intervals for input registers */
599	for (inst = s->C->Program.Instructions.Next;
600					inst != &s->C->Program.Instructions;
601					inst = inst->Next) {
602		rc_opcode op = rc_get_flow_control_inst(inst);
603		if (op == RC_OPCODE_BGNLOOP) {
604			struct rc_instruction * endloop =
605							rc_match_bgnloop(inst);
606			if (endloop->IP > s->LoopEnd) {
607				s->LoopEnd = endloop->IP;
608			}
609		}
610		rc_for_all_reads_mask(inst, scan_read_callback, s);
611	}
612
613	/* Create classes for input registers */
614	for (i = 0; i < s->NumInputs; i++) {
615		unsigned int chan, class_id, writemask = 0;
616		for (chan = 0; chan < 4; chan++) {
617			if (s->Input[i].Live[chan].Used) {
618				writemask |= (1 << chan);
619			}
620		}
621		s->Input[i].Writemask = writemask;
622		if (!writemask) {
623			continue;
624		}
625
626		class_id = ra_alloc_reg_class(regs);
627		input_classes[i] = class_id;
628		ra_class_add_reg(regs, class_id,
629				get_reg_id(s->Input[i].Index, writemask));
630	}
631
632	ra_set_finalize(regs);
633
634	graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
635
636	/* Build the interference graph */
637	for (var_ptr = variables, node_index = 0; var_ptr;
638					var_ptr = var_ptr->Next,node_index++) {
639		struct rc_list * a, * b;
640		unsigned int b_index;
641
642		ra_set_node_class(graph, node_index, node_classes[node_index]);
643
644		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
645						b; b = b->Next, b_index++) {
646			struct rc_variable * var_a = a->Item;
647			while (var_a) {
648				struct rc_variable * var_b = b->Item;
649				while (var_b) {
650					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
651						ra_add_node_interference(graph,
652							node_index, b_index);
653					}
654					var_b = var_b->Friend;
655				}
656				var_a = var_a->Friend;
657			}
658		}
659	}
660
661	/* Add input registers to the interference graph */
662	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
663		if (!s->Input[i].Writemask) {
664			continue;
665		}
666		ra_set_node_class(graph, node_count + input_node,
667							input_classes[i]);
668		for (var_ptr = variables, node_index = 0;
669				var_ptr; var_ptr = var_ptr->Next, node_index++) {
670			struct rc_variable * var = var_ptr->Item;
671			if (overlap_live_intervals_array(s->Input[i].Live,
672								var->Live)) {
673				ra_add_node_interference(graph, node_index,
674						node_count + input_node);
675			}
676		}
677		/* Manually allocate a register for this input */
678		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
679				s->Input[i].Index, s->Input[i].Writemask));
680		input_node++;
681	}
682
683	if (!ra_allocate_no_spills(graph)) {
684		rc_error(s->C, "Ran out of hardware temporaries\n");
685		return;
686	}
687
688	/* Rewrite the registers */
689	for (var_ptr = variables, node_index = 0; var_ptr;
690				var_ptr = var_ptr->Next, node_index++) {
691		int reg = ra_get_node_reg(graph, node_index);
692		unsigned int writemask = reg_get_writemask(reg);
693		unsigned int index = reg_get_index(reg);
694		struct rc_variable * var = var_ptr->Item;
695
696		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
697			writemask = rc_variable_writemask_sum(var);
698		}
699
700		if (var->Dst.File == RC_FILE_INPUT) {
701			continue;
702		}
703		rc_variable_change_dst(var, index, writemask);
704	}
705
706	ralloc_free(graph);
707	ralloc_free(regs);
708}
709
710/**
711 * @param user This parameter should be a pointer to an integer value.  If this
712 * integer value is zero, then a simple register allocator will be used that
713 * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
714 * user is non-zero, then the regular register allocator will be used
715 * (\sa do_regalloc).
716  */
717void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
718{
719	struct r300_fragment_program_compiler *c =
720				(struct r300_fragment_program_compiler*)cc;
721	struct regalloc_state s;
722	int * do_full_regalloc = (int*)user;
723
724	memset(&s, 0, sizeof(s));
725	s.C = cc;
726	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
727	s.Input = memory_pool_malloc(&cc->Pool,
728			s.NumInputs * sizeof(struct register_info));
729	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
730
731	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
732	s.Temporary = memory_pool_malloc(&cc->Pool,
733			s.NumTemporaries * sizeof(struct register_info));
734	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
735
736	rc_recompute_ips(s.C);
737
738	c->AllocateHwInputs(c, &alloc_input_simple, &s);
739	if (*do_full_regalloc) {
740		do_advanced_regalloc(&s);
741	} else {
742		s.Simple = 1;
743		do_regalloc_inputs_only(&s);
744	}
745
746	/* Rewrite inputs and if we are doing the simple allocation, rewrite
747	 * temporaries too. */
748	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
749					inst != &s.C->Program.Instructions;
750					inst = inst->Next) {
751		rc_remap_registers(inst, &remap_register, &s);
752	}
753}
754