1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include "radeon_program_pair.h"
29
30#include <stdio.h>
31
32#include "radeon_compiler.h"
33#include "radeon_compiler_util.h"
34#include "radeon_dataflow.h"
35#include "radeon_list.h"
36#include "radeon_variable.h"
37
38#include "util/u_debug.h"
39
40#define VERBOSE 0
41
42#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43
44struct schedule_instruction {
45	struct rc_instruction * Instruction;
46
47	/** Next instruction in the linked list of ready instructions. */
48	struct schedule_instruction *NextReady;
49
50	/** Values that this instruction reads and writes */
51	struct reg_value * WriteValues[4];
52	struct reg_value * ReadValues[12];
53	unsigned int NumWriteValues:3;
54	unsigned int NumReadValues:4;
55
56	/**
57	 * Number of (read and write) dependencies that must be resolved before
58	 * this instruction can be scheduled.
59	 */
60	unsigned int NumDependencies:5;
61
62	/** List of all readers (see rc_get_readers() for the definition of
63	 * "all readers"), even those outside the basic block this instruction
64	 * lives in. */
65	struct rc_reader_data GlobalReaders;
66
67	/** If the scheduler has paired an RGB and an Alpha instruction together,
68	 * PairedInst references the alpha insturction's dependency information.
69	 */
70	struct schedule_instruction * PairedInst;
71
72	/** This scheduler uses the value of Score to determine which
73	 * instruction to schedule.  Instructions with a higher value of Score
74	 * will be scheduled first. */
75	int Score;
76
77	/** The number of components that read from a TEX instruction. */
78	unsigned TexReadCount;
79
80	/** For TEX instructions a list of readers */
81	struct rc_list * TexReaders;
82};
83
84
85/**
86 * Used to keep track of which instructions read a value.
87 */
88struct reg_value_reader {
89	struct schedule_instruction *Reader;
90	struct reg_value_reader *Next;
91};
92
93/**
94 * Used to keep track which values are stored in each component of a
95 * RC_FILE_TEMPORARY.
96 */
97struct reg_value {
98	struct schedule_instruction * Writer;
99
100	/**
101	 * Unordered linked list of instructions that read from this value.
102	 * When this value becomes available, we increase all readers'
103	 * dependency count.
104	 */
105	struct reg_value_reader *Readers;
106
107	/**
108	 * Number of readers of this value. This is decremented each time
109	 * a reader of the value is committed.
110	 * When the reader cound reaches zero, the dependency count
111	 * of the instruction writing \ref Next is decremented.
112	 */
113	unsigned int NumReaders;
114
115	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116};
117
118struct register_state {
119	struct reg_value * Values[4];
120};
121
122struct remap_reg {
123	struct rc_instruciont * Inst;
124	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125	unsigned int OldSwizzle:3;
126	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127	unsigned int NewSwizzle:3;
128	unsigned int OnlyTexReads:1;
129	struct remap_reg * Next;
130};
131
132struct schedule_state {
133	struct radeon_compiler * C;
134	struct schedule_instruction * Current;
135	/** Array of the previous writers of Current's destination register
136	 * indexed by channel. */
137	struct schedule_instruction * PrevWriter[4];
138
139	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140
141	/**
142	 * Linked lists of instructions that can be scheduled right now,
143	 * based on which ALU/TEX resources they require.
144	 */
145	/*@{*/
146	struct schedule_instruction *ReadyFullALU;
147	struct schedule_instruction *ReadyRGB;
148	struct schedule_instruction *ReadyAlpha;
149	struct schedule_instruction *ReadyTEX;
150	/*@}*/
151	struct rc_list *PendingTEX;
152
153	void (*CalcScore)(struct schedule_instruction *);
154	long max_tex_group;
155	unsigned PrevBlockHasTex:1;
156	unsigned TEXCount;
157	unsigned Opt:1;
158};
159
160static struct reg_value ** get_reg_valuep(struct schedule_state * s,
161		rc_register_file file, unsigned int index, unsigned int chan)
162{
163	if (file != RC_FILE_TEMPORARY)
164		return 0;
165
166	if (index >= RC_REGISTER_MAX_INDEX) {
167		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
168		return 0;
169	}
170
171	return &s->Temporary[index].Values[chan];
172}
173
174static unsigned get_tex_read_count(struct schedule_instruction * sinst)
175{
176	unsigned tex_read_count = sinst->TexReadCount;
177	if (sinst->PairedInst) {
178		tex_read_count += sinst->PairedInst->TexReadCount;
179	}
180	return tex_read_count;
181}
182
183#if VERBOSE
184static void print_list(struct schedule_instruction * sinst)
185{
186	struct schedule_instruction * ptr;
187	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188		unsigned tex_read_count = get_tex_read_count(ptr);
189		unsigned score = sinst->Score;
190		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
191						tex_read_count);
192	}
193	fprintf(stderr, "\n");
194}
195#endif
196
197static void remove_inst_from_list(struct schedule_instruction ** list,
198					struct schedule_instruction * inst)
199{
200	struct schedule_instruction * prev = NULL;
201	struct schedule_instruction * list_ptr;
202	for (list_ptr = *list; list_ptr; prev = list_ptr,
203					list_ptr = list_ptr->NextReady) {
204		if (list_ptr == inst) {
205			if (prev) {
206				prev->NextReady = inst->NextReady;
207			} else {
208				*list = inst->NextReady;
209			}
210			inst->NextReady = NULL;
211			break;
212		}
213	}
214}
215
216static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
217{
218	inst->NextReady = *list;
219	*list = inst;
220}
221
222static void add_inst_to_list_score(struct schedule_instruction ** list,
223					struct schedule_instruction * inst)
224{
225	struct schedule_instruction * temp;
226	struct schedule_instruction * prev;
227	if (!*list) {
228		*list = inst;
229		return;
230	}
231	temp = *list;
232	prev = NULL;
233	while(temp && inst->Score <= temp->Score) {
234		prev = temp;
235		temp = temp->NextReady;
236	}
237
238	if (!prev) {
239		inst->NextReady = temp;
240		*list = inst;
241	} else {
242		prev->NextReady = inst;
243		inst->NextReady = temp;
244	}
245}
246
247static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
248{
249	DBG("%i is now ready\n", sinst->Instruction->IP);
250
251	/* Adding Ready TEX instructions to the end of the "Ready List" helps
252	 * us emit TEX instructions in blocks without losing our place. */
253	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254		add_inst_to_list_score(&s->ReadyTEX, sinst);
255	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256		add_inst_to_list_score(&s->ReadyRGB, sinst);
257	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258		add_inst_to_list_score(&s->ReadyAlpha, sinst);
259	else
260		add_inst_to_list_score(&s->ReadyFullALU, sinst);
261}
262
263static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
264{
265	assert(sinst->NumDependencies > 0);
266	sinst->NumDependencies--;
267	if (!sinst->NumDependencies)
268		instruction_ready(s, sinst);
269}
270
271/* These functions provide different heuristics for scheduling instructions.
272 * The default is calc_score_readers. */
273
274#if 0
275
276static void calc_score_zero(struct schedule_instruction * sinst)
277{
278	sinst->Score = 0;
279}
280
281static void calc_score_deps(struct schedule_instruction * sinst)
282{
283	int i;
284	sinst->Score = 0;
285	for (i = 0; i < sinst->NumWriteValues; i++) {
286		struct reg_value * v = sinst->WriteValues[i];
287		if (v->NumReaders) {
288			struct reg_value_reader * r;
289			for (r = v->Readers; r; r = r->Next) {
290				if (r->Reader->NumDependencies == 1) {
291					sinst->Score += 100;
292				}
293				sinst->Score += r->Reader->NumDependencies;
294			}
295		}
296	}
297}
298
299#endif
300
301#define NO_OUTPUT_SCORE (1 << 24)
302
303static void score_no_output(struct schedule_instruction * sinst)
304{
305	assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
306	if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
307			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
308		if (sinst->PairedInst) {
309			if (!sinst->PairedInst->Instruction->U.P.
310							RGB.OutputWriteMask
311					&& !sinst->PairedInst->Instruction->U.P.
312							Alpha.OutputWriteMask) {
313				sinst->Score |= NO_OUTPUT_SCORE;
314			}
315
316		} else {
317			sinst->Score |= NO_OUTPUT_SCORE;
318		}
319	}
320}
321
322#define PAIRED_SCORE (1 << 16)
323
324static void calc_score_r300(struct schedule_instruction * sinst)
325{
326	unsigned src_idx;
327
328	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
329		sinst->Score = 0;
330		return;
331	}
332
333	score_no_output(sinst);
334
335	if (sinst->PairedInst) {
336		sinst->Score |= PAIRED_SCORE;
337		return;
338	}
339
340	for (src_idx = 0; src_idx < 4; src_idx++) {
341		sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
342				sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
343	}
344}
345
346#define NO_READ_TEX_SCORE (1 << 16)
347
348static void calc_score_readers(struct schedule_instruction * sinst)
349{
350	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
351		sinst->Score = 0;
352	} else {
353		sinst->Score = sinst->NumReadValues;
354		if (sinst->PairedInst) {
355			sinst->Score += sinst->PairedInst->NumReadValues;
356		}
357		if (get_tex_read_count(sinst) == 0) {
358			sinst->Score |= NO_READ_TEX_SCORE;
359		}
360		score_no_output(sinst);
361	}
362}
363
364/**
365 * This function decreases the dependencies of the next instruction that
366 * wants to write to each of sinst's read values.
367 */
368static void commit_update_reads(struct schedule_state * s,
369					struct schedule_instruction * sinst){
370	unsigned int i;
371	for(i = 0; i < sinst->NumReadValues; ++i) {
372		struct reg_value * v = sinst->ReadValues[i];
373		assert(v->NumReaders > 0);
374		v->NumReaders--;
375		if (!v->NumReaders) {
376			if (v->Next) {
377				decrease_dependencies(s, v->Next->Writer);
378			}
379		}
380	}
381	if (sinst->PairedInst) {
382		commit_update_reads(s, sinst->PairedInst);
383	}
384}
385
386static void commit_update_writes(struct schedule_state * s,
387					struct schedule_instruction * sinst){
388	unsigned int i;
389	for(i = 0; i < sinst->NumWriteValues; ++i) {
390		struct reg_value * v = sinst->WriteValues[i];
391		if (v->NumReaders) {
392			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
393				decrease_dependencies(s, r->Reader);
394			}
395		} else {
396			/* This happens in instruction sequences of the type
397			 *  OP r.x, ...;
398			 *  OP r.x, r.x, ...;
399			 * See also the subtlety in how instructions that both
400			 * read and write the same register are scanned.
401			 */
402			if (v->Next)
403				decrease_dependencies(s, v->Next->Writer);
404		}
405	}
406	if (sinst->PairedInst) {
407		commit_update_writes(s, sinst->PairedInst);
408	}
409}
410
411static void notify_sem_wait(struct schedule_state *s)
412{
413	struct rc_list * pend_ptr;
414	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
415		struct rc_list * read_ptr;
416		struct schedule_instruction * pending = pend_ptr->Item;
417		for (read_ptr = pending->TexReaders; read_ptr;
418						read_ptr = read_ptr->Next) {
419			struct schedule_instruction * reader = read_ptr->Item;
420			reader->TexReadCount--;
421		}
422	}
423	s->PendingTEX = NULL;
424}
425
426static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
427{
428	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
429
430	commit_update_reads(s, sinst);
431
432	commit_update_writes(s, sinst);
433
434	if (get_tex_read_count(sinst) > 0) {
435		sinst->Instruction->U.P.SemWait = 1;
436		notify_sem_wait(s);
437	}
438}
439
440/**
441 * Emit all ready texture instructions in a single block.
442 *
443 * Emit as a single block to (hopefully) sample many textures in parallel,
444 * and to avoid hardware indirections on R300.
445 */
446static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
447{
448	struct schedule_instruction *readytex;
449	struct rc_instruction * inst_begin;
450
451	assert(s->ReadyTEX);
452	notify_sem_wait(s);
453
454	/* Node marker for R300 */
455	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
456	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
457
458	/* Link texture instructions back in */
459	readytex = s->ReadyTEX;
460	while(readytex) {
461		rc_insert_instruction(before->Prev, readytex->Instruction);
462		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
463
464		/* All of the TEX instructions in the same TEX block have
465		 * their source registers read from before any of the
466		 * instructions in that block write to their destination
467		 * registers.  This means that when we commit a TEX
468		 * instruction, any other TEX instruction that wants to write
469		 * to one of the committed instruction's source register can be
470		 * marked as ready and should be emitted in the same TEX
471		 * block. This prevents the following sequence from being
472		 * emitted in two different TEX blocks:
473		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
474		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
475		 */
476		commit_update_reads(s, readytex);
477		readytex = readytex->NextReady;
478	}
479	readytex = s->ReadyTEX;
480	s->ReadyTEX = 0;
481	while(readytex){
482		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
483		commit_update_writes(s, readytex);
484		/* Set semaphore bits for last TEX instruction in the block */
485		if (!readytex->NextReady) {
486			readytex->Instruction->U.I.TexSemAcquire = 1;
487			readytex->Instruction->U.I.TexSemWait = 1;
488		}
489		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
490		readytex = readytex->NextReady;
491	}
492}
493
494/* This is a helper function for destructive_merge_instructions().  It helps
495 * merge presubtract sources from two instructions and makes sure the
496 * presubtract sources end up in the correct spot.  This function assumes that
497 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
498 * but no scalar instruction (alpha).
499 * @return 0 if merging the presubtract sources fails.
500 * @retrun 1 if merging the presubtract sources succeeds.
501 */
502static int merge_presub_sources(
503	struct rc_pair_instruction * dst_full,
504	struct rc_pair_sub_instruction src,
505	unsigned int type)
506{
507	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
508	struct rc_pair_sub_instruction * dst_sub;
509	const struct rc_opcode_info * info;
510
511	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
512
513	switch(type) {
514	case RC_SOURCE_RGB:
515		is_rgb = 1;
516		is_alpha = 0;
517		dst_sub = &dst_full->RGB;
518		break;
519	case RC_SOURCE_ALPHA:
520		is_rgb = 0;
521		is_alpha = 1;
522		dst_sub = &dst_full->Alpha;
523		break;
524	default:
525		assert(0);
526		return 0;
527	}
528
529	info = rc_get_opcode_info(dst_full->RGB.Opcode);
530
531	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
532		return 0;
533
534	srcp_regs = rc_presubtract_src_reg_count(
535					src.Src[RC_PAIR_PRESUB_SRC].Index);
536	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
537		unsigned int arg;
538		int free_source;
539		unsigned int one_way = 0;
540		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
541		struct rc_pair_instruction_source temp;
542
543		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
544							srcp.File, srcp.Index);
545
546		/* If free_source < 0 then there are no free source
547		 * slots. */
548		if (free_source < 0)
549			return 0;
550
551		temp = dst_sub->Src[srcp_src];
552		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
553
554		/* srcp needs src0 and src1 to be the same */
555		if (free_source < srcp_src) {
556			if (!temp.Used)
557				continue;
558			free_source = rc_pair_alloc_source(dst_full, is_rgb,
559					is_alpha, temp.File, temp.Index);
560			if (free_source < 0)
561				return 0;
562			one_way = 1;
563		} else {
564			dst_sub->Src[free_source] = temp;
565		}
566
567		/* If free_source == srcp_src, then the presubtract
568		 * source is already in the correct place. */
569		if (free_source == srcp_src)
570			continue;
571
572		/* Shuffle the sources, so we can put the
573		 * presubtract source in the correct place. */
574		for(arg = 0; arg < info->NumSrcRegs; arg++) {
575			/*If this arg does not read from an rgb source,
576			 * do nothing. */
577			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
578								& type)) {
579				continue;
580			}
581
582			if (dst_full->RGB.Arg[arg].Source == srcp_src)
583				dst_full->RGB.Arg[arg].Source = free_source;
584			/* We need to do this just in case register
585			 * is one of the sources already, but in the
586			 * wrong spot. */
587			else if(dst_full->RGB.Arg[arg].Source == free_source
588							&& !one_way) {
589				dst_full->RGB.Arg[arg].Source = srcp_src;
590			}
591		}
592	}
593	return 1;
594}
595
596
597/* This function assumes that rgb.Alpha and alpha.RGB are unused */
598static int destructive_merge_instructions(
599		struct rc_pair_instruction * rgb,
600		struct rc_pair_instruction * alpha)
601{
602	const struct rc_opcode_info * opcode;
603
604	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
605	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
606
607	/* Presubtract registers need to be merged first so that registers
608	 * needed by the presubtract operation can be placed in src0 and/or
609	 * src1. */
610
611	/* Merge the rgb presubtract registers. */
612	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
613		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
614			return 0;
615		}
616	}
617	/* Merge the alpha presubtract registers */
618	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
619		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
620			return 0;
621		}
622	}
623
624	/* Copy alpha args into rgb */
625	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
626
627	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
628		unsigned int srcrgb = 0;
629		unsigned int srcalpha = 0;
630		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
631		rc_register_file file = 0;
632		unsigned int index = 0;
633		int source;
634
635		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
636			srcrgb = 1;
637			file = alpha->RGB.Src[oldsrc].File;
638			index = alpha->RGB.Src[oldsrc].Index;
639		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
640			srcalpha = 1;
641			file = alpha->Alpha.Src[oldsrc].File;
642			index = alpha->Alpha.Src[oldsrc].Index;
643		}
644
645		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
646		if (source < 0)
647			return 0;
648
649		rgb->Alpha.Arg[arg].Source = source;
650		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
651		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
652		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
653	}
654
655	/* Copy alpha opcode into rgb */
656	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
657	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
658	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
659	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
660	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
661	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
662	rgb->Alpha.Omod = alpha->Alpha.Omod;
663
664	/* Merge ALU result writing */
665	if (alpha->WriteALUResult) {
666		if (rgb->WriteALUResult)
667			return 0;
668
669		rgb->WriteALUResult = alpha->WriteALUResult;
670		rgb->ALUResultCompare = alpha->ALUResultCompare;
671	}
672
673	/* Copy SemWait */
674	rgb->SemWait |= alpha->SemWait;
675
676	return 1;
677}
678
679/**
680 * Try to merge the given instructions into the rgb instructions.
681 *
682 * Return true on success; on failure, return false, and keep
683 * the instructions untouched.
684 */
685static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
686{
687	struct rc_pair_instruction backup;
688
689	/*Instructions can't write output registers and ALU result at the
690	 * same time. */
691	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
692		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
693		return 0;
694	}
695
696	/* Writing output registers in the middle of shaders is slow, so
697	 * we don't want to pair output writes with temp writes. */
698	if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
699		|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
700		return 0;
701	}
702
703	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
704
705	if (destructive_merge_instructions(rgb, alpha))
706		return 1;
707
708	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
709	return 0;
710}
711
712static void presub_nop(struct rc_instruction * emitted) {
713	int prev_rgb_index, prev_alpha_index, i, num_src;
714
715	/* We don't need a nop if the previous instruction is a TEX. */
716	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
717		return;
718	}
719	if (emitted->Prev->U.P.RGB.WriteMask)
720		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
721	else
722		prev_rgb_index = -1;
723	if (emitted->Prev->U.P.Alpha.WriteMask)
724		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
725	else
726		prev_alpha_index = 1;
727
728	/* Check the previous rgb instruction */
729	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
730		num_src = rc_presubtract_src_reg_count(
731				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
732		for (i = 0; i < num_src; i++) {
733			unsigned int index = emitted->U.P.RGB.Src[i].Index;
734			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
735			    && (index  == prev_rgb_index
736				|| index == prev_alpha_index)) {
737				emitted->Prev->U.P.Nop = 1;
738				return;
739			}
740		}
741	}
742
743	/* Check the previous alpha instruction. */
744	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
745		return;
746
747	num_src = rc_presubtract_src_reg_count(
748				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
749	for (i = 0; i < num_src; i++) {
750		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
751		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
752		   && (index == prev_rgb_index || index == prev_alpha_index)) {
753			emitted->Prev->U.P.Nop = 1;
754			return;
755		}
756	}
757}
758
759static void rgb_to_alpha_remap (
760	struct rc_instruction * inst,
761	struct rc_pair_instruction_arg * arg,
762	rc_register_file old_file,
763	rc_swizzle old_swz,
764	unsigned int new_index)
765{
766	int new_src_index;
767	unsigned int i;
768
769	for (i = 0; i < 3; i++) {
770		if (get_swz(arg->Swizzle, i) == old_swz) {
771			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
772		}
773	}
774	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
775							old_file, new_index);
776	/* This conversion is not possible, we must have made a mistake in
777	 * is_rgb_to_alpha_possible. */
778	if (new_src_index < 0) {
779		assert(0);
780		return;
781	}
782
783	arg->Source = new_src_index;
784}
785
786static int can_remap(unsigned int opcode)
787{
788	switch(opcode) {
789	case RC_OPCODE_DDX:
790	case RC_OPCODE_DDY:
791		return 0;
792	default:
793		return 1;
794	}
795}
796
797static int can_convert_opcode_to_alpha(unsigned int opcode)
798{
799	switch(opcode) {
800	case RC_OPCODE_DDX:
801	case RC_OPCODE_DDY:
802	case RC_OPCODE_DP2:
803	case RC_OPCODE_DP3:
804	case RC_OPCODE_DP4:
805	case RC_OPCODE_DPH:
806		return 0;
807	default:
808		return 1;
809	}
810}
811
812static void is_rgb_to_alpha_possible(
813	void * userdata,
814	struct rc_instruction * inst,
815	struct rc_pair_instruction_arg * arg,
816	struct rc_pair_instruction_source * src)
817{
818	unsigned int read_chan = RC_SWIZZLE_UNUSED;
819	unsigned int alpha_sources = 0;
820	unsigned int i;
821	struct rc_reader_data * reader_data = userdata;
822
823	if (!can_remap(inst->U.P.RGB.Opcode)
824	    || !can_remap(inst->U.P.Alpha.Opcode)) {
825		reader_data->Abort = 1;
826		return;
827	}
828
829	if (!src)
830		return;
831
832	/* XXX There are some cases where we can still do the conversion if
833	 * a reader reads from a presubtract source, but for now we'll prevent
834	 * it. */
835	if (arg->Source == RC_PAIR_PRESUB_SRC) {
836		reader_data->Abort = 1;
837		return;
838	}
839
840	/* Make sure the source only reads the register component that we
841	 * are going to be convering from.  It is OK if the instruction uses
842	 * this component more than once.
843	 * XXX If the index we will be converting to is the same as the
844	 * current index, then it is OK to read from more than one component.
845	 */
846	for (i = 0; i < 3; i++) {
847		rc_swizzle swz = get_swz(arg->Swizzle, i);
848		switch(swz) {
849		case RC_SWIZZLE_X:
850		case RC_SWIZZLE_Y:
851		case RC_SWIZZLE_Z:
852		case RC_SWIZZLE_W:
853			if (read_chan == RC_SWIZZLE_UNUSED) {
854				read_chan = swz;
855			} else if (read_chan != swz) {
856				reader_data->Abort = 1;
857				return;
858			}
859			break;
860		default:
861			break;
862		}
863	}
864
865	/* Make sure there are enough alpha sources.
866	 * XXX If we know what register all the readers are going
867	 * to be remapped to, then in some situations we can still do
868	 * the subsitution, even if all 3 alpha sources are being used.*/
869	for (i = 0; i < 3; i++) {
870		if (inst->U.P.Alpha.Src[i].Used) {
871			alpha_sources++;
872		}
873	}
874	if (alpha_sources > 2) {
875		reader_data->Abort = 1;
876		return;
877	}
878}
879
880static int convert_rgb_to_alpha(
881	struct schedule_state * s,
882	struct schedule_instruction * sched_inst)
883{
884	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
885	unsigned int old_mask = pair_inst->RGB.WriteMask;
886	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
887	const struct rc_opcode_info * info =
888				rc_get_opcode_info(pair_inst->RGB.Opcode);
889	int new_index = -1;
890	unsigned int i;
891
892	if (sched_inst->GlobalReaders.Abort)
893		return 0;
894
895	if (!pair_inst->RGB.WriteMask)
896		return 0;
897
898	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
899	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
900		return 0;
901	}
902
903	assert(sched_inst->NumWriteValues == 1);
904
905	if (!sched_inst->WriteValues[0]) {
906		assert(0);
907		return 0;
908	}
909
910	/* We start at the old index, because if we can reuse the same
911	 * register and just change the swizzle then it is more likely we
912	 * will be able to convert all the readers. */
913	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
914		struct reg_value ** new_regvalp = get_reg_valuep(
915						s, RC_FILE_TEMPORARY, i, 3);
916		if (!*new_regvalp) {
917			struct reg_value ** old_regvalp =
918				get_reg_valuep(s,
919					RC_FILE_TEMPORARY,
920					pair_inst->RGB.DestIndex,
921					rc_mask_to_swizzle(old_mask));
922			new_index = i;
923			*new_regvalp = *old_regvalp;
924			*old_regvalp = NULL;
925			new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
926			break;
927		}
928	}
929	if (new_index < 0) {
930		return 0;
931	}
932
933	/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
934	 * as the RGB opcode, then the Alpha instruction will already contain
935	 * the correct opcode and instruction args, so we do not want to
936	 * overwrite them.
937	 */
938	if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
939		pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
940		memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
941						sizeof(pair_inst->Alpha.Arg));
942	}
943	pair_inst->Alpha.DestIndex = new_index;
944	pair_inst->Alpha.WriteMask = RC_MASK_W;
945	pair_inst->Alpha.Target = pair_inst->RGB.Target;
946	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
947	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
948	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
949	pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
950	/* Move the swizzles into the first chan */
951	for (i = 0; i < info->NumSrcRegs; i++) {
952		unsigned int j;
953		for (j = 0; j < 3; j++) {
954			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
955			if (swz != RC_SWIZZLE_UNUSED) {
956				pair_inst->Alpha.Arg[i].Swizzle =
957							rc_init_swizzle(swz, 1);
958				break;
959			}
960		}
961	}
962	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
963	pair_inst->RGB.DestIndex = 0;
964	pair_inst->RGB.WriteMask = 0;
965	pair_inst->RGB.Target = 0;
966	pair_inst->RGB.OutputWriteMask = 0;
967	pair_inst->RGB.DepthWriteMask = 0;
968	pair_inst->RGB.Saturate = 0;
969	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
970
971	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
972		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
973		rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
974					RC_FILE_TEMPORARY, old_swz, new_index);
975	}
976	return 1;
977}
978
979static void try_convert_and_pair(
980	struct schedule_state *s,
981	struct schedule_instruction ** inst_list)
982{
983	struct schedule_instruction * list_ptr = *inst_list;
984	while (list_ptr && *inst_list && (*inst_list)->NextReady) {
985		int paired = 0;
986		if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
987			&& list_ptr->Instruction->U.P.RGB.Opcode
988						!= RC_OPCODE_REPL_ALPHA) {
989				goto next;
990		}
991		if (list_ptr->NumWriteValues == 1
992					&& convert_rgb_to_alpha(s, list_ptr)) {
993
994			struct schedule_instruction * pair_ptr;
995			remove_inst_from_list(inst_list, list_ptr);
996			add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
997
998			for (pair_ptr = s->ReadyRGB; pair_ptr;
999					pair_ptr = pair_ptr->NextReady) {
1000				if (merge_instructions(&pair_ptr->Instruction->U.P,
1001						&list_ptr->Instruction->U.P)) {
1002					remove_inst_from_list(&s->ReadyAlpha, list_ptr);
1003					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1004					pair_ptr->PairedInst = list_ptr;
1005
1006					add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1007					list_ptr = *inst_list;
1008					paired = 1;
1009					break;
1010				}
1011
1012			}
1013		}
1014		if (!paired) {
1015next:
1016			list_ptr = list_ptr->NextReady;
1017		}
1018	}
1019}
1020
1021/**
1022 * This function attempts to merge RGB and Alpha instructions together.
1023 */
1024static void pair_instructions(struct schedule_state * s)
1025{
1026	struct schedule_instruction *rgb_ptr;
1027	struct schedule_instruction *alpha_ptr;
1028
1029	/* Some pairings might fail because they require too
1030	 * many source slots; try all possible pairings if necessary */
1031	rgb_ptr = s->ReadyRGB;
1032	while(rgb_ptr) {
1033		struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1034		alpha_ptr = s->ReadyAlpha;
1035		while(alpha_ptr) {
1036			struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1037			if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1038				/* Remove RGB and Alpha from their ready lists.
1039				 */
1040				remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1041				remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1042				rgb_ptr->PairedInst = alpha_ptr;
1043				add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1044				break;
1045			}
1046			alpha_ptr = alpha_next;
1047		}
1048		rgb_ptr = rgb_next;
1049	}
1050
1051	if (!s->Opt) {
1052		return;
1053	}
1054
1055	/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1056	 * slot can be converted into Alpha instructions. */
1057	try_convert_and_pair(s, &s->ReadyFullALU);
1058
1059	/* Try to convert some of the RGB instructions to Alpha and
1060	 * try to pair it with another RGB. */
1061	try_convert_and_pair(s, &s->ReadyRGB);
1062}
1063
1064static void update_max_score(
1065	struct schedule_state * s,
1066	struct schedule_instruction ** list,
1067	int * max_score,
1068	struct schedule_instruction ** max_inst_out,
1069	struct schedule_instruction *** list_out)
1070{
1071	struct schedule_instruction * list_ptr;
1072	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1073		int score;
1074		s->CalcScore(list_ptr);
1075		score = list_ptr->Score;
1076		if (!*max_inst_out || score > *max_score) {
1077			*max_score = score;
1078			*max_inst_out = list_ptr;
1079			*list_out = list;
1080		}
1081	}
1082}
1083
1084static void emit_instruction(
1085	struct schedule_state * s,
1086	struct rc_instruction * before)
1087{
1088	int max_score = -1;
1089	struct schedule_instruction * max_inst = NULL;
1090	struct schedule_instruction ** max_list = NULL;
1091	unsigned tex_count = 0;
1092	struct schedule_instruction * tex_ptr;
1093
1094	pair_instructions(s);
1095#if VERBOSE
1096	fprintf(stderr, "Full:\n");
1097	print_list(s->ReadyFullALU);
1098	fprintf(stderr, "RGB:\n");
1099	print_list(s->ReadyRGB);
1100	fprintf(stderr, "Alpha:\n");
1101	print_list(s->ReadyAlpha);
1102	fprintf(stderr, "TEX:\n");
1103	print_list(s->ReadyTEX);
1104#endif
1105
1106	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1107		if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1108			emit_all_tex(s, before);
1109			return;
1110		}
1111		tex_count++;
1112	}
1113	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1114	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1115	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1116
1117	if (tex_count >= s->max_tex_group || max_score == -1
1118		|| (s->TEXCount > 0 && tex_count == s->TEXCount)
1119		|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
1120		emit_all_tex(s, before);
1121	} else {
1122
1123
1124		remove_inst_from_list(max_list, max_inst);
1125		rc_insert_instruction(before->Prev, max_inst->Instruction);
1126		commit_alu_instruction(s, max_inst);
1127
1128		presub_nop(before->Prev);
1129	}
1130}
1131
1132static void add_tex_reader(
1133	struct schedule_state * s,
1134	struct schedule_instruction * writer,
1135	struct schedule_instruction * reader)
1136{
1137	if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1138		/*Not a TEX instructions */
1139		return;
1140	}
1141	reader->TexReadCount++;
1142	rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1143}
1144
1145static void scan_read(void * data, struct rc_instruction * inst,
1146		rc_register_file file, unsigned int index, unsigned int chan)
1147{
1148	struct schedule_state * s = data;
1149	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1150	struct reg_value_reader * reader;
1151
1152	if (!v)
1153		return;
1154
1155	if (*v && (*v)->Writer == s->Current) {
1156		/* The instruction reads and writes to a register component.
1157		 * In this case, we only want to increment dependencies by one.
1158		 * Why?
1159		 * Because each instruction depends on the writers of its source
1160		 * registers _and_ the most recent writer of its destination
1161		 * register.  In this case, the current instruction (s->Current)
1162		 * has a dependency that both writes to one of its source
1163		 * registers and was the most recent writer to its destination
1164		 * register.  We have already marked this dependency in
1165		 * scan_write(), so we don't need to do it again.
1166		 */
1167
1168		/* We need to make sure we are adding s->Current to the
1169		 * previous writer's list of TexReaders, if the previous writer
1170		 * was a TEX instruction.
1171		 */
1172		add_tex_reader(s, s->PrevWriter[chan], s->Current);
1173
1174		return;
1175	}
1176
1177	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1178
1179	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1180	reader->Reader = s->Current;
1181	if (!*v) {
1182		/* In this situation, the instruction reads from a register
1183		 * that hasn't been written to or read from in the current
1184		 * block. */
1185		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1186		memset(*v, 0, sizeof(struct reg_value));
1187		(*v)->Readers = reader;
1188	} else {
1189		reader->Next = (*v)->Readers;
1190		(*v)->Readers = reader;
1191		/* Only update the current instruction's dependencies if the
1192		 * register it reads from has been written to in this block. */
1193		if ((*v)->Writer) {
1194			add_tex_reader(s, (*v)->Writer, s->Current);
1195			s->Current->NumDependencies++;
1196		}
1197	}
1198	(*v)->NumReaders++;
1199
1200	if (s->Current->NumReadValues >= 12) {
1201		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1202	} else {
1203		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1204	}
1205}
1206
1207static void scan_write(void * data, struct rc_instruction * inst,
1208		rc_register_file file, unsigned int index, unsigned int chan)
1209{
1210	struct schedule_state * s = data;
1211	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1212	struct reg_value * newv;
1213
1214	if (!pv)
1215		return;
1216
1217	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1218
1219	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1220	memset(newv, 0, sizeof(*newv));
1221
1222	newv->Writer = s->Current;
1223
1224	if (*pv) {
1225		(*pv)->Next = newv;
1226		s->Current->NumDependencies++;
1227		/* Keep track of the previous writer to s->Current's destination
1228		 * register */
1229		s->PrevWriter[chan] = (*pv)->Writer;
1230	}
1231
1232	*pv = newv;
1233
1234	if (s->Current->NumWriteValues >= 4) {
1235		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1236	} else {
1237		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1238	}
1239}
1240
1241static void is_rgb_to_alpha_possible_normal(
1242	void * userdata,
1243	struct rc_instruction * inst,
1244	struct rc_src_register * src)
1245{
1246	struct rc_reader_data * reader_data = userdata;
1247	reader_data->Abort = 1;
1248
1249}
1250
1251static void schedule_block(struct schedule_state * s,
1252		struct rc_instruction * begin, struct rc_instruction * end)
1253{
1254	unsigned int ip;
1255
1256	/* Scan instructions for data dependencies */
1257	ip = 0;
1258	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1259		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1260		memset(s->Current, 0, sizeof(struct schedule_instruction));
1261
1262		if (inst->Type == RC_INSTRUCTION_NORMAL) {
1263			const struct rc_opcode_info * info =
1264					rc_get_opcode_info(inst->U.I.Opcode);
1265			if (info->HasTexture) {
1266				s->TEXCount++;
1267			}
1268		}
1269
1270		/* XXX: This causes SemWait to be set for all instructions in
1271		 * a block if the previous block contained a TEX instruction.
1272		 * We can do better here, but it will take a lot of work. */
1273		if (s->PrevBlockHasTex) {
1274			s->Current->TexReadCount = 1;
1275		}
1276
1277		s->Current->Instruction = inst;
1278		inst->IP = ip++;
1279
1280		DBG("%i: Scanning\n", inst->IP);
1281
1282		/* The order of things here is subtle and maybe slightly
1283		 * counter-intuitive, to account for the case where an
1284		 * instruction writes to the same register as it reads
1285		 * from. */
1286		rc_for_all_writes_chan(inst, &scan_write, s);
1287		rc_for_all_reads_chan(inst, &scan_read, s);
1288
1289		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1290
1291		if (!s->Current->NumDependencies) {
1292			instruction_ready(s, s->Current);
1293		}
1294
1295		/* Get global readers for possible RGB->Alpha conversion. */
1296		s->Current->GlobalReaders.ExitOnAbort = 1;
1297		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1298				is_rgb_to_alpha_possible_normal,
1299				is_rgb_to_alpha_possible, NULL);
1300	}
1301
1302	/* Temporarily unlink all instructions */
1303	begin->Prev->Next = end;
1304	end->Prev = begin->Prev;
1305
1306	/* Schedule instructions back */
1307	while(!s->C->Error &&
1308	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1309		emit_instruction(s, end);
1310	}
1311}
1312
1313static int is_controlflow(struct rc_instruction * inst)
1314{
1315	if (inst->Type == RC_INSTRUCTION_NORMAL) {
1316		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1317		return opcode->IsFlowControl;
1318	}
1319	return 0;
1320}
1321
1322void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1323{
1324	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1325	struct schedule_state s;
1326	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1327	unsigned int * opt = user;
1328
1329	memset(&s, 0, sizeof(s));
1330	s.Opt = *opt;
1331	s.C = &c->Base;
1332	if (s.C->is_r500) {
1333		s.CalcScore = calc_score_readers;
1334	} else {
1335		s.CalcScore = calc_score_r300;
1336	}
1337	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1338	while(inst != &c->Base.Program.Instructions) {
1339		struct rc_instruction * first;
1340
1341		if (is_controlflow(inst)) {
1342			inst = inst->Next;
1343			continue;
1344		}
1345
1346		first = inst;
1347
1348		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1349			inst = inst->Next;
1350
1351		DBG("Schedule one block\n");
1352		memset(s.Temporary, 0, sizeof(s.Temporary));
1353		s.TEXCount = 0;
1354		schedule_block(&s, first, inst);
1355		if (s.PendingTEX) {
1356			s.PrevBlockHasTex = 1;
1357		}
1358	}
1359}
1360