1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright (C) 2009 Nicolai Haehnle.
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved.
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a copy of this software and associated documentation files (the
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish,
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sublicense, and/or sell copies of the Software, and to
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions:
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * portions of the Software.
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_program_pair.h"
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <stdio.h>
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler.h"
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler_util.h"
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_dataflow.h"
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_list.h"
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_variable.h"
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_debug.h"
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define VERBOSE 0
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct schedule_instruction {
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * Instruction;
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** Next instruction in the linked list of ready instructions. */
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *NextReady;
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** Values that this instruction reads and writes */
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value * WriteValues[4];
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value * ReadValues[12];
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int NumWriteValues:3;
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int NumReadValues:4;
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/**
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * Number of (read and write) dependencies that must be resolved before
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * this instruction can be scheduled.
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int NumDependencies:5;
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** List of all readers (see rc_get_readers() for the definition of
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * "all readers"), even those outside the basic block this instruction
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * lives in. */
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data GlobalReaders;
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** If the scheduler has paired an RGB and an Alpha instruction together,
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * PairedInst references the alpha insturction's dependency information.
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * PairedInst;
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** This scheduler uses the value of Score to determine which
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * instruction to schedule.  Instructions with a higher value of Score
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * will be scheduled first. */
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int Score;
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** The number of components that read from a TEX instruction. */
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned TexReadCount;
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** For TEX instructions a list of readers */
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_list * TexReaders;
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Used to keep track of which instructions read a value.
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct reg_value_reader {
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *Reader;
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value_reader *Next;
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Used to keep track which values are stored in each component of a
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * RC_FILE_TEMPORARY.
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct reg_value {
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * Writer;
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/**
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * Unordered linked list of instructions that read from this value.
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * When this value becomes available, we increase all readers'
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * dependency count.
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value_reader *Readers;
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/**
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * Number of readers of this value. This is decremented each time
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * a reader of the value is committed.
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * When the reader cound reaches zero, the dependency count
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * of the instruction writing \ref Next is decremented.
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int NumReaders;
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct register_state {
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value * Values[4];
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct remap_reg {
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruciont * Inst;
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int OldSwizzle:3;
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int NewSwizzle:3;
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int OnlyTexReads:1;
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct remap_reg * Next;
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct schedule_state {
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct radeon_compiler * C;
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * Current;
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/** Array of the previous writers of Current's destination register
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * indexed by channel. */
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * PrevWriter[4];
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/**
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * Linked lists of instructions that can be scheduled right now,
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * based on which ALU/TEX resources they require.
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/*@{*/
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *ReadyFullALU;
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *ReadyRGB;
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *ReadyAlpha;
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *ReadyTEX;
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/*@}*/
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_list *PendingTEX;
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void (*CalcScore)(struct schedule_instruction *);
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	long max_tex_group;
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned PrevBlockHasTex:1;
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned TEXCount;
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned Opt:1;
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic struct reg_value ** get_reg_valuep(struct schedule_state * s,
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_register_file file, unsigned int index, unsigned int chan)
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (file != RC_FILE_TEMPORARY)
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (index >= RC_REGISTER_MAX_INDEX) {
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return &s->Temporary[index].Values[chan];
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic unsigned get_tex_read_count(struct schedule_instruction * sinst)
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned tex_read_count = sinst->TexReadCount;
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->PairedInst) {
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		tex_read_count += sinst->PairedInst->TexReadCount;
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return tex_read_count;
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if VERBOSE
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void print_list(struct schedule_instruction * sinst)
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * ptr;
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned tex_read_count = get_tex_read_count(ptr);
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned score = sinst->Score;
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						tex_read_count);
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	fprintf(stderr, "\n");
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void remove_inst_from_list(struct schedule_instruction ** list,
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					struct schedule_instruction * inst)
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * prev = NULL;
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * list_ptr;
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (list_ptr = *list; list_ptr; prev = list_ptr,
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					list_ptr = list_ptr->NextReady) {
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (list_ptr == inst) {
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (prev) {
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				prev->NextReady = inst->NextReady;
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else {
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				*list = inst->NextReady;
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst->NextReady = NULL;
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			break;
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst->NextReady = *list;
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	*list = inst;
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void add_inst_to_list_score(struct schedule_instruction ** list,
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					struct schedule_instruction * inst)
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * temp;
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * prev;
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!*list) {
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		*list = inst;
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	temp = *list;
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	prev = NULL;
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(temp && inst->Score <= temp->Score) {
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		prev = temp;
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		temp = temp->NextReady;
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!prev) {
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst->NextReady = temp;
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		*list = inst;
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		prev->NextReady = inst;
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst->NextReady = temp;
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	DBG("%i is now ready\n", sinst->Instruction->IP);
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Adding Ready TEX instructions to the end of the "Ready List" helps
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * us emit TEX instructions in blocks without losing our place. */
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		add_inst_to_list_score(&s->ReadyTEX, sinst);
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		add_inst_to_list_score(&s->ReadyRGB, sinst);
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		add_inst_to_list_score(&s->ReadyAlpha, sinst);
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		add_inst_to_list_score(&s->ReadyFullALU, sinst);
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(sinst->NumDependencies > 0);
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sinst->NumDependencies--;
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!sinst->NumDependencies)
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		instruction_ready(s, sinst);
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* These functions provide different heuristics for scheduling instructions.
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The default is calc_score_readers. */
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if 0
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_zero(struct schedule_instruction * sinst)
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sinst->Score = 0;
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_deps(struct schedule_instruction * sinst)
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int i;
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	sinst->Score = 0;
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < sinst->NumWriteValues; i++) {
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct reg_value * v = sinst->WriteValues[i];
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (v->NumReaders) {
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			struct reg_value_reader * r;
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			for (r = v->Readers; r; r = r->Next) {
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				if (r->Reader->NumDependencies == 1) {
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					sinst->Score += 100;
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				}
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				sinst->Score += r->Reader->NumDependencies;
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define NO_OUTPUT_SCORE (1 << 24)
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void score_no_output(struct schedule_instruction * sinst)
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (sinst->PairedInst) {
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (!sinst->PairedInst->Instruction->U.P.
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							RGB.OutputWriteMask
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					&& !sinst->PairedInst->Instruction->U.P.
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							Alpha.OutputWriteMask) {
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				sinst->Score |= NO_OUTPUT_SCORE;
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else {
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			sinst->Score |= NO_OUTPUT_SCORE;
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define PAIRED_SCORE (1 << 16)
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_r300(struct schedule_instruction * sinst)
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned src_idx;
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sinst->Score = 0;
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	score_no_output(sinst);
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->PairedInst) {
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sinst->Score |= PAIRED_SCORE;
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (src_idx = 0; src_idx < 4; src_idx++) {
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define NO_READ_TEX_SCORE (1 << 16)
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_readers(struct schedule_instruction * sinst)
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sinst->Score = 0;
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sinst->Score = sinst->NumReadValues;
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (sinst->PairedInst) {
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			sinst->Score += sinst->PairedInst->NumReadValues;
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (get_tex_read_count(sinst) == 0) {
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			sinst->Score |= NO_READ_TEX_SCORE;
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		score_no_output(sinst);
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This function decreases the dependencies of the next instruction that
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * wants to write to each of sinst's read values.
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void commit_update_reads(struct schedule_state * s,
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					struct schedule_instruction * sinst){
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = 0; i < sinst->NumReadValues; ++i) {
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct reg_value * v = sinst->ReadValues[i];
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		assert(v->NumReaders > 0);
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		v->NumReaders--;
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!v->NumReaders) {
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (v->Next) {
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				decrease_dependencies(s, v->Next->Writer);
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->PairedInst) {
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		commit_update_reads(s, sinst->PairedInst);
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void commit_update_writes(struct schedule_state * s,
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					struct schedule_instruction * sinst){
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = 0; i < sinst->NumWriteValues; ++i) {
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct reg_value * v = sinst->WriteValues[i];
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (v->NumReaders) {
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				decrease_dependencies(s, r->Reader);
394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else {
396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			/* This happens in instruction sequences of the type
397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 *  OP r.x, ...;
398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 *  OP r.x, r.x, ...;
399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 * See also the subtlety in how instructions that both
400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 * read and write the same register are scanned.
401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 */
402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (v->Next)
403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				decrease_dependencies(s, v->Next->Writer);
404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sinst->PairedInst) {
407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		commit_update_writes(s, sinst->PairedInst);
408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void notify_sem_wait(struct schedule_state *s)
412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_list * pend_ptr;
414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_list * read_ptr;
416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct schedule_instruction * pending = pend_ptr->Item;
417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (read_ptr = pending->TexReaders; read_ptr;
418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						read_ptr = read_ptr->Next) {
419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			struct schedule_instruction * reader = read_ptr->Item;
420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			reader->TexReadCount--;
421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	s->PendingTEX = NULL;
424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	commit_update_reads(s, sinst);
431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	commit_update_writes(s, sinst);
433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (get_tex_read_count(sinst) > 0) {
435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		sinst->Instruction->U.P.SemWait = 1;
436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		notify_sem_wait(s);
437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Emit all ready texture instructions in a single block.
442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Emit as a single block to (hopefully) sample many textures in parallel,
444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and to avoid hardware indirections on R300.
445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *readytex;
449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst_begin;
450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(s->ReadyTEX);
452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	notify_sem_wait(s);
453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Node marker for R300 */
455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Link texture instructions back in */
459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	readytex = s->ReadyTEX;
460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(readytex) {
461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_insert_instruction(before->Prev, readytex->Instruction);
462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* All of the TEX instructions in the same TEX block have
465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * their source registers read from before any of the
466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * instructions in that block write to their destination
467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * registers.  This means that when we commit a TEX
468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * instruction, any other TEX instruction that wants to write
469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * to one of the committed instruction's source register can be
470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * marked as ready and should be emitted in the same TEX
471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * block. This prevents the following sequence from being
472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * emitted in two different TEX blocks:
473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 */
476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		commit_update_reads(s, readytex);
477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		readytex = readytex->NextReady;
478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	readytex = s->ReadyTEX;
480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	s->ReadyTEX = 0;
481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(readytex){
482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		commit_update_writes(s, readytex);
484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* Set semaphore bits for last TEX instruction in the block */
485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!readytex->NextReady) {
486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			readytex->Instruction->U.I.TexSemAcquire = 1;
487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			readytex->Instruction->U.I.TexSemWait = 1;
488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		readytex = readytex->NextReady;
491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* This is a helper function for destructive_merge_instructions().  It helps
495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * merge presubtract sources from two instructions and makes sure the
496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * presubtract sources end up in the correct spot.  This function assumes that
497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * but no scalar instruction (alpha).
499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 0 if merging the presubtract sources fails.
500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @retrun 1 if merging the presubtract sources succeeds.
501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int merge_presub_sources(
503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_instruction * dst_full,
504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_sub_instruction src,
505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int type)
506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_sub_instruction * dst_sub;
509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	const struct rc_opcode_info * info;
510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	switch(type) {
514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_SOURCE_RGB:
515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		is_rgb = 1;
516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		is_alpha = 0;
517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		dst_sub = &dst_full->RGB;
518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		break;
519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_SOURCE_ALPHA:
520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		is_rgb = 0;
521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		is_alpha = 1;
522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		dst_sub = &dst_full->Alpha;
523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		break;
524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	default:
525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		assert(0);
526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	info = rc_get_opcode_info(dst_full->RGB.Opcode);
530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	srcp_regs = rc_presubtract_src_reg_count(
535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					src.Src[RC_PAIR_PRESUB_SRC].Index);
536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int arg;
538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		int free_source;
539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int one_way = 0;
540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_pair_instruction_source temp;
542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							srcp.File, srcp.Index);
545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* If free_source < 0 then there are no free source
547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * slots. */
548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (free_source < 0)
549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		temp = dst_sub->Src[srcp_src];
552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* srcp needs src0 and src1 to be the same */
555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (free_source < srcp_src) {
556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (!temp.Used)
557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				continue;
558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			free_source = rc_pair_alloc_source(dst_full, is_rgb,
559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					is_alpha, temp.File, temp.Index);
560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (free_source < 0)
561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return 0;
562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			one_way = 1;
563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else {
564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			dst_sub->Src[free_source] = temp;
565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* If free_source == srcp_src, then the presubtract
568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * source is already in the correct place. */
569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (free_source == srcp_src)
570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* Shuffle the sources, so we can put the
573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * presubtract source in the correct place. */
574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for(arg = 0; arg < info->NumSrcRegs; arg++) {
575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			/*If this arg does not read from an rgb source,
576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 * do nothing. */
577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org								& type)) {
579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				continue;
580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (dst_full->RGB.Arg[arg].Source == srcp_src)
583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				dst_full->RGB.Arg[arg].Source = free_source;
584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			/* We need to do this just in case register
585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 * is one of the sources already, but in the
586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 * wrong spot. */
587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			else if(dst_full->RGB.Arg[arg].Source == free_source
588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							&& !one_way) {
589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				dst_full->RGB.Arg[arg].Source = srcp_src;
590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* This function assumes that rgb.Alpha and alpha.RGB are unused */
598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int destructive_merge_instructions(
599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_pair_instruction * rgb,
600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_pair_instruction * alpha)
601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	const struct rc_opcode_info * opcode;
603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Presubtract registers need to be merged first so that registers
608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * needed by the presubtract operation can be placed in src0 and/or
609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * src1. */
610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Merge the rgb presubtract registers. */
612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Merge the alpha presubtract registers */
618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Copy alpha args into rgb */
625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int srcrgb = 0;
629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int srcalpha = 0;
630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_register_file file = 0;
632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int index = 0;
633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		int source;
634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			srcrgb = 1;
637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			file = alpha->RGB.Src[oldsrc].File;
638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			index = alpha->RGB.Src[oldsrc].Index;
639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			srcalpha = 1;
641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			file = alpha->Alpha.Src[oldsrc].File;
642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			index = alpha->Alpha.Src[oldsrc].Index;
643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (source < 0)
647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb->Alpha.Arg[arg].Source = source;
650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Copy alpha opcode into rgb */
656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->Alpha.Omod = alpha->Alpha.Omod;
663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Merge ALU result writing */
665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (alpha->WriteALUResult) {
666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (rgb->WriteALUResult)
667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return 0;
668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb->WriteALUResult = alpha->WriteALUResult;
670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb->ALUResultCompare = alpha->ALUResultCompare;
671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Copy SemWait */
674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb->SemWait |= alpha->SemWait;
675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Try to merge the given instructions into the rgb instructions.
681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Return true on success; on failure, return false, and keep
683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the instructions untouched.
684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_instruction backup;
688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/*Instructions can't write output registers and ALU result at the
690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * same time. */
691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Writing output registers in the middle of shaders is slow, so
697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * we don't want to pair output writes with temp writes. */
698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (destructive_merge_instructions(rgb, alpha))
706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 1;
707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 0;
710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_nop(struct rc_instruction * emitted) {
713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int prev_rgb_index, prev_alpha_index, i, num_src;
714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* We don't need a nop if the previous instruction is a TEX. */
716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (emitted->Prev->U.P.RGB.WriteMask)
720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else
722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		prev_rgb_index = -1;
723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (emitted->Prev->U.P.Alpha.WriteMask)
724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	else
726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		prev_alpha_index = 1;
727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Check the previous rgb instruction */
729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		num_src = rc_presubtract_src_reg_count(
731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (i = 0; i < num_src; i++) {
733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			unsigned int index = emitted->U.P.RGB.Src[i].Index;
734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			    && (index  == prev_rgb_index
736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				|| index == prev_alpha_index)) {
737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				emitted->Prev->U.P.Nop = 1;
738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return;
739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Check the previous alpha instruction. */
744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	num_src = rc_presubtract_src_reg_count(
748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < num_src; i++) {
750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		   && (index == prev_rgb_index || index == prev_alpha_index)) {
753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			emitted->Prev->U.P.Nop = 1;
754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void rgb_to_alpha_remap (
760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_instruction_arg * arg,
762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_register_file old_file,
763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_swizzle old_swz,
764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int new_index)
765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int new_src_index;
767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < 3; i++) {
770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (get_swz(arg->Swizzle, i) == old_swz) {
771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							old_file, new_index);
776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* This conversion is not possible, we must have made a mistake in
777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * is_rgb_to_alpha_possible. */
778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (new_src_index < 0) {
779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		assert(0);
780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	arg->Source = new_src_index;
784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int can_remap(unsigned int opcode)
787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	switch(opcode) {
789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DDX:
790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DDY:
791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	default:
793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 1;
794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int can_convert_opcode_to_alpha(unsigned int opcode)
798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	switch(opcode) {
800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DDX:
801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DDY:
802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DP2:
803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DP3:
804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DP4:
805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	case RC_OPCODE_DPH:
806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	default:
808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 1;
809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void is_rgb_to_alpha_possible(
813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * userdata,
814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_instruction_arg * arg,
816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_instruction_source * src)
817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int read_chan = RC_SWIZZLE_UNUSED;
819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int alpha_sources = 0;
820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data * reader_data = userdata;
822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!can_remap(inst->U.P.RGB.Opcode)
824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    || !can_remap(inst->U.P.Alpha.Opcode)) {
825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!src)
830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* XXX There are some cases where we can still do the conversion if
833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * a reader reads from a presubtract source, but for now we'll prevent
834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * it. */
835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (arg->Source == RC_PAIR_PRESUB_SRC) {
836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Make sure the source only reads the register component that we
841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * are going to be convering from.  It is OK if the instruction uses
842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * this component more than once.
843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * XXX If the index we will be converting to is the same as the
844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * current index, then it is OK to read from more than one component.
845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < 3; i++) {
847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_swizzle swz = get_swz(arg->Swizzle, i);
848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		switch(swz) {
849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		case RC_SWIZZLE_X:
850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		case RC_SWIZZLE_Y:
851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		case RC_SWIZZLE_Z:
852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		case RC_SWIZZLE_W:
853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (read_chan == RC_SWIZZLE_UNUSED) {
854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				read_chan = swz;
855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			} else if (read_chan != swz) {
856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				reader_data->Abort = 1;
857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				return;
858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			break;
860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		default:
861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			break;
862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Make sure there are enough alpha sources.
866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * XXX If we know what register all the readers are going
867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * to be remapped to, then in some situations we can still do
868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * the subsitution, even if all 3 alpha sources are being used.*/
869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < 3; i++) {
870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst->U.P.Alpha.Src[i].Used) {
871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			alpha_sources++;
872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (alpha_sources > 2) {
875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader_data->Abort = 1;
876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int convert_rgb_to_alpha(
881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state * s,
882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * sched_inst)
883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int old_mask = pair_inst->RGB.WriteMask;
886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	const struct rc_opcode_info * info =
888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				rc_get_opcode_info(pair_inst->RGB.Opcode);
889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int new_index = -1;
890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int i;
891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (sched_inst->GlobalReaders.Abort)
893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!pair_inst->RGB.WriteMask)
896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	assert(sched_inst->NumWriteValues == 1);
904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!sched_inst->WriteValues[0]) {
906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		assert(0);
907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* We start at the old index, because if we can reuse the same
911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * register and just change the swizzle then it is more likely we
912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * will be able to convert all the readers. */
913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct reg_value ** new_regvalp = get_reg_valuep(
915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						s, RC_FILE_TEMPORARY, i, 3);
916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!*new_regvalp) {
917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			struct reg_value ** old_regvalp =
918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				get_reg_valuep(s,
919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					RC_FILE_TEMPORARY,
920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					pair_inst->RGB.DestIndex,
921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					rc_mask_to_swizzle(old_mask));
922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			new_index = i;
923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*new_regvalp = *old_regvalp;
924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*old_regvalp = NULL;
925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			break;
927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (new_index < 0) {
930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return 0;
931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * as the RGB opcode, then the Alpha instruction will already contain
935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * the correct opcode and instruction args, so we do not want to
936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * overwrite them.
937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 */
938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						sizeof(pair_inst->Alpha.Arg));
942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.DestIndex = new_index;
944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.WriteMask = RC_MASK_W;
945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.Target = pair_inst->RGB.Target;
946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Move the swizzles into the first chan */
951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (i = 0; i < info->NumSrcRegs; i++) {
952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		unsigned int j;
953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		for (j = 0; j < 3; j++) {
954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (swz != RC_SWIZZLE_UNUSED) {
956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				pair_inst->Alpha.Arg[i].Swizzle =
957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org							rc_init_swizzle(swz, 1);
958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				break;
959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.DestIndex = 0;
964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.WriteMask = 0;
965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.Target = 0;
966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.OutputWriteMask = 0;
967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.DepthWriteMask = 0;
968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_inst->RGB.Saturate = 0;
969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					RC_FILE_TEMPORARY, old_swz, new_index);
975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 1;
977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void try_convert_and_pair(
980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state *s,
981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction ** inst_list)
982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * list_ptr = *inst_list;
984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while (list_ptr && *inst_list && (*inst_list)->NextReady) {
985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		int paired = 0;
986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			&& list_ptr->Instruction->U.P.RGB.Opcode
988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						!= RC_OPCODE_REPL_ALPHA) {
989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				goto next;
990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (list_ptr->NumWriteValues == 1
992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					&& convert_rgb_to_alpha(s, list_ptr)) {
993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			struct schedule_instruction * pair_ptr;
995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			remove_inst_from_list(inst_list, list_ptr);
996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			for (pair_ptr = s->ReadyRGB; pair_ptr;
999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					pair_ptr = pair_ptr->NextReady) {
1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				if (merge_instructions(&pair_ptr->Instruction->U.P,
1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						&list_ptr->Instruction->U.P)) {
1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					remove_inst_from_list(&s->ReadyAlpha, list_ptr);
1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					pair_ptr->PairedInst = list_ptr;
1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					add_inst_to_list(&s->ReadyFullALU, pair_ptr);
1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					list_ptr = *inst_list;
1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					paired = 1;
1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					break;
1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				}
1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!paired) {
1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnext:
1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			list_ptr = list_ptr->NextReady;
1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This function attempts to merge RGB and Alpha instructions together.
1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void pair_instructions(struct schedule_state * s)
1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *rgb_ptr;
1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *alpha_ptr;
1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Some pairings might fail because they require too
1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * many source slots; try all possible pairings if necessary */
1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rgb_ptr = s->ReadyRGB;
1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(rgb_ptr) {
1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		alpha_ptr = s->ReadyAlpha;
1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		while(alpha_ptr) {
1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				/* Remove RGB and Alpha from their ready lists.
1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				 */
1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				rgb_ptr->PairedInst = alpha_ptr;
1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				break;
1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			alpha_ptr = alpha_next;
1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rgb_ptr = rgb_next;
1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!s->Opt) {
1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * slot can be converted into Alpha instructions. */
1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	try_convert_and_pair(s, &s->ReadyFullALU);
1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Try to convert some of the RGB instructions to Alpha and
1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 * try to pair it with another RGB. */
1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	try_convert_and_pair(s, &s->ReadyRGB);
1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void update_max_score(
1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state * s,
1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction ** list,
1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int * max_score,
1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction ** max_inst_out,
1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction *** list_out)
1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * list_ptr;
1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		int score;
1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->CalcScore(list_ptr);
1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		score = list_ptr->Score;
1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!*max_inst_out || score > *max_score) {
1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*max_score = score;
1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*max_inst_out = list_ptr;
1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			*list_out = list;
1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void emit_instruction(
1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state * s,
1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * before)
1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	int max_score = -1;
1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * max_inst = NULL;
1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction ** max_list = NULL;
1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned tex_count = 0;
1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * tex_ptr;
1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	pair_instructions(s);
1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if VERBOSE
1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	fprintf(stderr, "Full:\n");
1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	print_list(s->ReadyFullALU);
1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	fprintf(stderr, "RGB:\n");
1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	print_list(s->ReadyRGB);
1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	fprintf(stderr, "Alpha:\n");
1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	print_list(s->ReadyAlpha);
1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	fprintf(stderr, "TEX:\n");
1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	print_list(s->ReadyTEX);
1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif
1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			emit_all_tex(s, before);
1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			return;
1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		tex_count++;
1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (tex_count >= s->max_tex_group || max_score == -1
1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| (s->TEXCount > 0 && tex_count == s->TEXCount)
1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		|| (!s->C->is_r500 && tex_count > 0 && max_score == -1)) {
1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		emit_all_tex(s, before);
1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		remove_inst_from_list(max_list, max_inst);
1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_insert_instruction(before->Prev, max_inst->Instruction);
1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		commit_alu_instruction(s, max_inst);
1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		presub_nop(before->Prev);
1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void add_tex_reader(
1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state * s,
1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * writer,
1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_instruction * reader)
1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/*Not a TEX instructions */
1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader->TexReadCount++;
1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void scan_read(void * data, struct rc_instruction * inst,
1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_register_file file, unsigned int index, unsigned int chan)
1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state * s = data;
1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value_reader * reader;
1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!v)
1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (*v && (*v)->Writer == s->Current) {
1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* The instruction reads and writes to a register component.
1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * In this case, we only want to increment dependencies by one.
1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * Why?
1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * Because each instruction depends on the writers of its source
1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * registers _and_ the most recent writer of its destination
1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * register.  In this case, the current instruction (s->Current)
1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * has a dependency that both writes to one of its source
1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * registers and was the most recent writer to its destination
1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * register.  We have already marked this dependency in
1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * scan_write(), so we don't need to do it again.
1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 */
1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* We need to make sure we are adding s->Current to the
1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * previous writer's list of TexReaders, if the previous writer
1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * was a TEX instruction.
1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 */
1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		add_tex_reader(s, s->PrevWriter[chan], s->Current);
1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader->Reader = s->Current;
1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!*v) {
1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* In this situation, the instruction reads from a register
1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * that hasn't been written to or read from in the current
1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * block. */
1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		memset(*v, 0, sizeof(struct reg_value));
1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		(*v)->Readers = reader;
1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		reader->Next = (*v)->Readers;
1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		(*v)->Readers = reader;
1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* Only update the current instruction's dependencies if the
1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * register it reads from has been written to in this block. */
1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if ((*v)->Writer) {
1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			add_tex_reader(s, (*v)->Writer, s->Current);
1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			s->Current->NumDependencies++;
1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	(*v)->NumReaders++;
1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (s->Current->NumReadValues >= 12) {
1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void scan_write(void * data, struct rc_instruction * inst,
1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_register_file file, unsigned int index, unsigned int chan)
1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state * s = data;
1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct reg_value * newv;
1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (!pv)
1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return;
1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	memset(newv, 0, sizeof(*newv));
1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	newv->Writer = s->Current;
1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (*pv) {
1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		(*pv)->Next = newv;
1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->Current->NumDependencies++;
1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* Keep track of the previous writer to s->Current's destination
1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * register */
1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->PrevWriter[chan] = (*pv)->Writer;
1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	*pv = newv;
1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (s->Current->NumWriteValues >= 4) {
1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void is_rgb_to_alpha_possible_normal(
1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	void * userdata,
1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst,
1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_src_register * src)
1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_reader_data * reader_data = userdata;
1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	reader_data->Abort = 1;
1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void schedule_block(struct schedule_state * s,
1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_instruction * begin, struct rc_instruction * end)
1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int ip;
1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Scan instructions for data dependencies */
1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	ip = 0;
1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		memset(s->Current, 0, sizeof(struct schedule_instruction));
1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (inst->Type == RC_INSTRUCTION_NORMAL) {
1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			const struct rc_opcode_info * info =
1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					rc_get_opcode_info(inst->U.I.Opcode);
1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			if (info->HasTexture) {
1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				s->TEXCount++;
1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			}
1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* XXX: This causes SemWait to be set for all instructions in
1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * a block if the previous block contained a TEX instruction.
1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * We can do better here, but it will take a lot of work. */
1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (s->PrevBlockHasTex) {
1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			s->Current->TexReadCount = 1;
1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->Current->Instruction = inst;
1278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		inst->IP = ip++;
1279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		DBG("%i: Scanning\n", inst->IP);
1281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* The order of things here is subtle and maybe slightly
1283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * counter-intuitive, to account for the case where an
1284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * instruction writes to the same register as it reads
1285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		 * from. */
1286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_for_all_writes_chan(inst, &scan_write, s);
1287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_for_all_reads_chan(inst, &scan_read, s);
1288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (!s->Current->NumDependencies) {
1292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			instruction_ready(s, s->Current);
1293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		/* Get global readers for possible RGB->Alpha conversion. */
1296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s->Current->GlobalReaders.ExitOnAbort = 1;
1297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				is_rgb_to_alpha_possible_normal,
1299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				is_rgb_to_alpha_possible, NULL);
1300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Temporarily unlink all instructions */
1303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	begin->Prev->Next = end;
1304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	end->Prev = begin->Prev;
1305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	/* Schedule instructions back */
1307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(!s->C->Error &&
1308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		emit_instruction(s, end);
1310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int is_controlflow(struct rc_instruction * inst)
1314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (inst->Type == RC_INSTRUCTION_NORMAL) {
1316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		return opcode->IsFlowControl;
1318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	return 0;
1320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_pair_schedule(struct radeon_compiler *cc, void *user)
1323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
1324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct schedule_state s;
1326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	unsigned int * opt = user;
1328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	memset(&s, 0, sizeof(s));
1330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	s.Opt = *opt;
1331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	s.C = &c->Base;
1332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	if (s.C->is_r500) {
1333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s.CalcScore = calc_score_readers;
1334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	} else {
1335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s.CalcScore = calc_score_r300;
1336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	while(inst != &c->Base.Program.Instructions) {
1339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		struct rc_instruction * first;
1340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (is_controlflow(inst)) {
1342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst = inst->Next;
1343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			continue;
1344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		first = inst;
1347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			inst = inst->Next;
1350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
1351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		DBG("Schedule one block\n");
1352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		memset(s.Temporary, 0, sizeof(s.Temporary));
1353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		s.TEXCount = 0;
1354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		schedule_block(&s, first, inst);
1355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		if (s.PendingTEX) {
1356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			s.PrevBlockHasTex = 1;
1357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		}
1358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	}
1359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
1360