1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright (C) 2009 Nicolai Haehnle. 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved. 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a copy of this software and associated documentation files (the 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish, 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sublicense, and/or sell copies of the Software, and to 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions: 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * portions of the Software. 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_program_pair.h" 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <stdio.h> 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler.h" 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_compiler_util.h" 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_dataflow.h" 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_list.h" 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "radeon_variable.h" 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_debug.h" 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define VERBOSE 0 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct schedule_instruction { 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * Instruction; 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** Next instruction in the linked list of ready instructions. */ 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *NextReady; 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** Values that this instruction reads and writes */ 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * WriteValues[4]; 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * ReadValues[12]; 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int NumWriteValues:3; 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int NumReadValues:4; 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Number of (read and write) dependencies that must be resolved before 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * this instruction can be scheduled. 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int NumDependencies:5; 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** List of all readers (see rc_get_readers() for the definition of 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "all readers"), even those outside the basic block this instruction 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * lives in. */ 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data GlobalReaders; 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** If the scheduler has paired an RGB and an Alpha instruction together, 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * PairedInst references the alpha insturction's dependency information. 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * PairedInst; 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** This scheduler uses the value of Score to determine which 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instruction to schedule. Instructions with a higher value of Score 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * will be scheduled first. */ 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int Score; 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** The number of components that read from a TEX instruction. */ 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned TexReadCount; 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** For TEX instructions a list of readers */ 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list * TexReaders; 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Used to keep track of which instructions read a value. 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct reg_value_reader { 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *Reader; 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value_reader *Next; 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Used to keep track which values are stored in each component of a 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * RC_FILE_TEMPORARY. 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct reg_value { 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * Writer; 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Unordered linked list of instructions that read from this value. 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * When this value becomes available, we increase all readers' 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * dependency count. 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value_reader *Readers; 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Number of readers of this value. This is decremented each time 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a reader of the value is committed. 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * When the reader cound reaches zero, the dependency count 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the instruction writing \ref Next is decremented. 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int NumReaders; 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct register_state { 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * Values[4]; 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct remap_reg { 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruciont * Inst; 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int OldSwizzle:3; 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int NewSwizzle:3; 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int OnlyTexReads:1; 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct remap_reg * Next; 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct schedule_state { 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct radeon_compiler * C; 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * Current; 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** Array of the previous writers of Current's destination register 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * indexed by channel. */ 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * PrevWriter[4]; 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct register_state Temporary[RC_REGISTER_MAX_INDEX]; 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Linked lists of instructions that can be scheduled right now, 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * based on which ALU/TEX resources they require. 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /*@{*/ 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *ReadyFullALU; 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *ReadyRGB; 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *ReadyAlpha; 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *ReadyTEX; 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /*@}*/ 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list *PendingTEX; 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void (*CalcScore)(struct schedule_instruction *); 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org long max_tex_group; 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned PrevBlockHasTex:1; 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned TEXCount; 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned Opt:1; 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic struct reg_value ** get_reg_valuep(struct schedule_state * s, 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, unsigned int index, unsigned int chan) 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (file != RC_FILE_TEMPORARY) 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (index >= RC_REGISTER_MAX_INDEX) { 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return &s->Temporary[index].Values[chan]; 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic unsigned get_tex_read_count(struct schedule_instruction * sinst) 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tex_read_count = sinst->TexReadCount; 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->PairedInst) { 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tex_read_count += sinst->PairedInst->TexReadCount; 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return tex_read_count; 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if VERBOSE 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void print_list(struct schedule_instruction * sinst) 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * ptr; 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (ptr = sinst; ptr; ptr=ptr->NextReady) { 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tex_read_count = get_tex_read_count(ptr); 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned score = sinst->Score; 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score, 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tex_read_count); 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "\n"); 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void remove_inst_from_list(struct schedule_instruction ** list, 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * inst) 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * prev = NULL; 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * list_ptr; 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (list_ptr = *list; list_ptr; prev = list_ptr, 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org list_ptr = list_ptr->NextReady) { 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (list_ptr == inst) { 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prev) { 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev->NextReady = inst->NextReady; 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list = inst->NextReady; 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->NextReady = NULL; 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->NextReady = *list; 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list = inst; 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void add_inst_to_list_score(struct schedule_instruction ** list, 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * inst) 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * temp; 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * prev; 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!*list) { 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list = inst; 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp = *list; 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev = NULL; 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(temp && inst->Score <= temp->Score) { 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev = temp; 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp = temp->NextReady; 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prev) { 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->NextReady = temp; 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list = inst; 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev->NextReady = inst; 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->NextReady = temp; 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i is now ready\n", sinst->Instruction->IP); 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Adding Ready TEX instructions to the end of the "Ready List" helps 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * us emit TEX instructions in blocks without losing our place. */ 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list_score(&s->ReadyTEX, sinst); 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list_score(&s->ReadyRGB, sinst); 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list_score(&s->ReadyAlpha, sinst); 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list_score(&s->ReadyFullALU, sinst); 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sinst->NumDependencies > 0); 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->NumDependencies--; 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!sinst->NumDependencies) 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org instruction_ready(s, sinst); 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* These functions provide different heuristics for scheduling instructions. 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The default is calc_score_readers. */ 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if 0 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_zero(struct schedule_instruction * sinst) 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score = 0; 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_deps(struct schedule_instruction * sinst) 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score = 0; 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < sinst->NumWriteValues; i++) { 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * v = sinst->WriteValues[i]; 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (v->NumReaders) { 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value_reader * r; 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (r = v->Readers; r; r = r->Next) { 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (r->Reader->NumDependencies == 1) { 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score += 100; 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score += r->Reader->NumDependencies; 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define NO_OUTPUT_SCORE (1 << 24) 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void score_no_output(struct schedule_instruction * sinst) 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!sinst->Instruction->U.P.RGB.OutputWriteMask && 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !sinst->Instruction->U.P.Alpha.OutputWriteMask) { 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->PairedInst) { 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!sinst->PairedInst->Instruction->U.P. 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RGB.OutputWriteMask 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && !sinst->PairedInst->Instruction->U.P. 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Alpha.OutputWriteMask) { 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score |= NO_OUTPUT_SCORE; 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score |= NO_OUTPUT_SCORE; 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define PAIRED_SCORE (1 << 16) 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_r300(struct schedule_instruction * sinst) 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned src_idx; 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score = 0; 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org score_no_output(sinst); 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->PairedInst) { 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score |= PAIRED_SCORE; 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (src_idx = 0; src_idx < 4; src_idx++) { 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Instruction->U.P.Alpha.Src[src_idx].Used; 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define NO_READ_TEX_SCORE (1 << 16) 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void calc_score_readers(struct schedule_instruction * sinst) 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score = 0; 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score = sinst->NumReadValues; 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->PairedInst) { 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score += sinst->PairedInst->NumReadValues; 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (get_tex_read_count(sinst) == 0) { 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Score |= NO_READ_TEX_SCORE; 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org score_no_output(sinst); 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This function decreases the dependencies of the next instruction that 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * wants to write to each of sinst's read values. 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void commit_update_reads(struct schedule_state * s, 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * sinst){ 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < sinst->NumReadValues; ++i) { 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * v = sinst->ReadValues[i]; 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(v->NumReaders > 0); 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org v->NumReaders--; 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!v->NumReaders) { 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (v->Next) { 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org decrease_dependencies(s, v->Next->Writer); 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->PairedInst) { 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_update_reads(s, sinst->PairedInst); 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void commit_update_writes(struct schedule_state * s, 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * sinst){ 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < sinst->NumWriteValues; ++i) { 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * v = sinst->WriteValues[i]; 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (v->NumReaders) { 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org decrease_dependencies(s, r->Reader); 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* This happens in instruction sequences of the type 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OP r.x, ...; 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OP r.x, r.x, ...; 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * See also the subtlety in how instructions that both 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * read and write the same register are scanned. 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (v->Next) 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org decrease_dependencies(s, v->Next->Writer); 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sinst->PairedInst) { 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_update_writes(s, sinst->PairedInst); 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void notify_sem_wait(struct schedule_state *s) 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list * pend_ptr; 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_list * read_ptr; 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * pending = pend_ptr->Item; 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (read_ptr = pending->TexReaders; read_ptr; 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org read_ptr = read_ptr->Next) { 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * reader = read_ptr->Item; 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader->TexReadCount--; 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->PendingTEX = NULL; 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_update_reads(s, sinst); 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_update_writes(s, sinst); 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (get_tex_read_count(sinst) > 0) { 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sinst->Instruction->U.P.SemWait = 1; 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org notify_sem_wait(s); 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Emit all ready texture instructions in a single block. 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Emit as a single block to (hopefully) sample many textures in parallel, 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and to avoid hardware indirections on R300. 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *readytex; 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst_begin; 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(s->ReadyTEX); 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org notify_sem_wait(s); 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Node marker for R300 */ 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_begin = rc_insert_new_instruction(s->C, before->Prev); 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Link texture instructions back in */ 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org readytex = s->ReadyTEX; 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(readytex) { 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_insert_instruction(before->Prev, readytex->Instruction); 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: commit TEX reads\n", readytex->Instruction->IP); 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* All of the TEX instructions in the same TEX block have 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * their source registers read from before any of the 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instructions in that block write to their destination 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers. This means that when we commit a TEX 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instruction, any other TEX instruction that wants to write 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to one of the committed instruction's source register can be 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * marked as ready and should be emitted in the same TEX 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * block. This prevents the following sequence from being 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * emitted in two different TEX blocks: 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_update_reads(s, readytex); 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org readytex = readytex->NextReady; 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org readytex = s->ReadyTEX; 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->ReadyTEX = 0; 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(readytex){ 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: commit TEX writes\n", readytex->Instruction->IP); 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_update_writes(s, readytex); 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Set semaphore bits for last TEX instruction in the block */ 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!readytex->NextReady) { 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org readytex->Instruction->U.I.TexSemAcquire = 1; 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org readytex->Instruction->U.I.TexSemWait = 1; 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org readytex = readytex->NextReady; 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* This is a helper function for destructive_merge_instructions(). It helps 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * merge presubtract sources from two instructions and makes sure the 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * presubtract sources end up in the correct spot. This function assumes that 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * but no scalar instruction (alpha). 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return 0 if merging the presubtract sources fails. 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @retrun 1 if merging the presubtract sources succeeds. 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int merge_presub_sources( 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction * dst_full, 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_sub_instruction src, 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int type) 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_sub_instruction * dst_sub; 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * info; 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(type) { 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_SOURCE_RGB: 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_rgb = 1; 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_alpha = 0; 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_sub = &dst_full->RGB; 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_SOURCE_ALPHA: 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_rgb = 0; 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_alpha = 1; 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_sub = &dst_full->Alpha; 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org info = rc_get_opcode_info(dst_full->RGB.Opcode); 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org srcp_regs = rc_presubtract_src_reg_count( 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src.Src[RC_PAIR_PRESUB_SRC].Index); 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int arg; 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int free_source; 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int one_way = 0; 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction_source srcp = src.Src[srcp_src]; 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction_source temp; 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org srcp.File, srcp.Index); 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If free_source < 0 then there are no free source 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * slots. */ 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (free_source < 0) 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp = dst_sub->Src[srcp_src]; 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* srcp needs src0 and src1 to be the same */ 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (free_source < srcp_src) { 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!temp.Used) 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org free_source = rc_pair_alloc_source(dst_full, is_rgb, 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_alpha, temp.File, temp.Index); 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (free_source < 0) 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org one_way = 1; 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_sub->Src[free_source] = temp; 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If free_source == srcp_src, then the presubtract 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * source is already in the correct place. */ 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (free_source == srcp_src) 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Shuffle the sources, so we can put the 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * presubtract source in the correct place. */ 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(arg = 0; arg < info->NumSrcRegs; arg++) { 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /*If this arg does not read from an rgb source, 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * do nothing. */ 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org & type)) { 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst_full->RGB.Arg[arg].Source == srcp_src) 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_full->RGB.Arg[arg].Source = free_source; 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We need to do this just in case register 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * is one of the sources already, but in the 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * wrong spot. */ 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if(dst_full->RGB.Arg[arg].Source == free_source 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && !one_way) { 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_full->RGB.Arg[arg].Source = srcp_src; 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* This function assumes that rgb.Alpha and alpha.RGB are unused */ 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int destructive_merge_instructions( 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction * rgb, 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction * alpha) 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * opcode; 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(alpha->RGB.Opcode == RC_OPCODE_NOP); 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Presubtract registers need to be merged first so that registers 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * needed by the presubtract operation can be placed in src0 and/or 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * src1. */ 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Merge the rgb presubtract registers. */ 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Merge the alpha presubtract registers */ 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Copy alpha args into rgb */ 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode = rc_get_opcode_info(alpha->Alpha.Opcode); 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int srcrgb = 0; 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int srcalpha = 0; 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file = 0; 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int index = 0; 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int source; 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org srcrgb = 1; 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org file = alpha->RGB.Src[oldsrc].File; 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org index = alpha->RGB.Src[oldsrc].Index; 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org srcalpha = 1; 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org file = alpha->Alpha.Src[oldsrc].File; 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org index = alpha->Alpha.Src[oldsrc].Index; 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (source < 0) 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Arg[arg].Source = source; 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Copy alpha opcode into rgb */ 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Opcode = alpha->Alpha.Opcode; 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Saturate = alpha->Alpha.Saturate; 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->Alpha.Omod = alpha->Alpha.Omod; 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Merge ALU result writing */ 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (alpha->WriteALUResult) { 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rgb->WriteALUResult) 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->WriteALUResult = alpha->WriteALUResult; 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->ALUResultCompare = alpha->ALUResultCompare; 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Copy SemWait */ 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb->SemWait |= alpha->SemWait; 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Try to merge the given instructions into the rgb instructions. 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Return true on success; on failure, return false, and keep 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the instructions untouched. 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction backup; 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /*Instructions can't write output registers and ALU result at the 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * same time. */ 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Writing output registers in the middle of shaders is slow, so 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we don't want to pair output writes with temp writes. */ 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) { 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); 704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (destructive_merge_instructions(rgb, alpha)) 706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); 709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void presub_nop(struct rc_instruction * emitted) { 713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int prev_rgb_index, prev_alpha_index, i, num_src; 714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We don't need a nop if the previous instruction is a TEX. */ 716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { 717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (emitted->Prev->U.P.RGB.WriteMask) 720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; 721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev_rgb_index = -1; 723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (emitted->Prev->U.P.Alpha.WriteMask) 724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; 725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev_alpha_index = 1; 727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Check the previous rgb instruction */ 729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { 730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org num_src = rc_presubtract_src_reg_count( 731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); 732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < num_src; i++) { 733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int index = emitted->U.P.RGB.Src[i].Index; 734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY 735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && (index == prev_rgb_index 736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || index == prev_alpha_index)) { 737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emitted->Prev->U.P.Nop = 1; 738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Check the previous alpha instruction. */ 744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) 745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org num_src = rc_presubtract_src_reg_count( 748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); 749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < num_src; i++) { 750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int index = emitted->U.P.Alpha.Src[i].Index; 751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY 752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && (index == prev_rgb_index || index == prev_alpha_index)) { 753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emitted->Prev->U.P.Nop = 1; 754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void rgb_to_alpha_remap ( 760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction_arg * arg, 762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file old_file, 763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle old_swz, 764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int new_index) 765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int new_src_index; 767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 3; i++) { 770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (get_swz(arg->Swizzle, i) == old_swz) { 771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); 772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, 775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org old_file, new_index); 776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* This conversion is not possible, we must have made a mistake in 777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * is_rgb_to_alpha_possible. */ 778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (new_src_index < 0) { 779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg->Source = new_src_index; 784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int can_remap(unsigned int opcode) 787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(opcode) { 789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DDX: 790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DDY: 791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int can_convert_opcode_to_alpha(unsigned int opcode) 798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(opcode) { 800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DDX: 801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DDY: 802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DP2: 803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DP3: 804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DP4: 805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_OPCODE_DPH: 806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void is_rgb_to_alpha_possible( 813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * userdata, 814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction_arg * arg, 816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction_source * src) 817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int read_chan = RC_SWIZZLE_UNUSED; 819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int alpha_sources = 0; 820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data * reader_data = userdata; 822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!can_remap(inst->U.P.RGB.Opcode) 824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || !can_remap(inst->U.P.Alpha.Opcode)) { 825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!src) 830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX There are some cases where we can still do the conversion if 833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a reader reads from a presubtract source, but for now we'll prevent 834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * it. */ 835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (arg->Source == RC_PAIR_PRESUB_SRC) { 836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Make sure the source only reads the register component that we 841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * are going to be convering from. It is OK if the instruction uses 842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * this component more than once. 843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * XXX If the index we will be converting to is the same as the 844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * current index, then it is OK to read from more than one component. 845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 3; i++) { 847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_swizzle swz = get_swz(arg->Swizzle, i); 848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(swz) { 849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_SWIZZLE_X: 850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_SWIZZLE_Y: 851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_SWIZZLE_Z: 852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case RC_SWIZZLE_W: 853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (read_chan == RC_SWIZZLE_UNUSED) { 854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org read_chan = swz; 855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (read_chan != swz) { 856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Make sure there are enough alpha sources. 866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * XXX If we know what register all the readers are going 867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to be remapped to, then in some situations we can still do 868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the subsitution, even if all 3 alpha sources are being used.*/ 869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 3; i++) { 870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->U.P.Alpha.Src[i].Used) { 871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org alpha_sources++; 872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (alpha_sources > 2) { 875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int convert_rgb_to_alpha( 881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state * s, 882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * sched_inst) 883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; 885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int old_mask = pair_inst->RGB.WriteMask; 886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int old_swz = rc_mask_to_swizzle(old_mask); 887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * info = 888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_opcode_info(pair_inst->RGB.Opcode); 889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int new_index = -1; 890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sched_inst->GlobalReaders.Abort) 893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!pair_inst->RGB.WriteMask) 896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) 899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { 900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sched_inst->NumWriteValues == 1); 904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!sched_inst->WriteValues[0]) { 906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We start at the old index, because if we can reuse the same 911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register and just change the swizzle then it is more likely we 912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * will be able to convert all the readers. */ 913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { 914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value ** new_regvalp = get_reg_valuep( 915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s, RC_FILE_TEMPORARY, i, 3); 916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!*new_regvalp) { 917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value ** old_regvalp = 918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org get_reg_valuep(s, 919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RC_FILE_TEMPORARY, 920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.DestIndex, 921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_mask_to_swizzle(old_mask)); 922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_index = i; 923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *new_regvalp = *old_regvalp; 924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *old_regvalp = NULL; 925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); 926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (new_index < 0) { 930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA 934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * as the RGB opcode, then the Alpha instruction will already contain 935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the correct opcode and instruction args, so we do not want to 936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * overwrite them. 937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) { 939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; 940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, 941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeof(pair_inst->Alpha.Arg)); 942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.DestIndex = new_index; 944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.WriteMask = RC_MASK_W; 945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.Target = pair_inst->RGB.Target; 946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; 947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; 948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; 949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.Omod = pair_inst->RGB.Omod; 950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Move the swizzles into the first chan */ 951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < info->NumSrcRegs; i++) { 952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int j; 953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 0; j < 3; j++) { 954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); 955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swz != RC_SWIZZLE_UNUSED) { 956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->Alpha.Arg[i].Swizzle = 957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_init_swizzle(swz, 1); 958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.Opcode = RC_OPCODE_NOP; 963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.DestIndex = 0; 964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.WriteMask = 0; 965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.Target = 0; 966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.OutputWriteMask = 0; 967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.DepthWriteMask = 0; 968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_inst->RGB.Saturate = 0; 969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); 970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { 972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; 973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, 974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RC_FILE_TEMPORARY, old_swz, new_index); 975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void try_convert_and_pair( 980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state *s, 981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction ** inst_list) 982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * list_ptr = *inst_list; 984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while (list_ptr && *inst_list && (*inst_list)->NextReady) { 985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int paired = 0; 986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP 987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && list_ptr->Instruction->U.P.RGB.Opcode 988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org != RC_OPCODE_REPL_ALPHA) { 989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org goto next; 990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (list_ptr->NumWriteValues == 1 992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && convert_rgb_to_alpha(s, list_ptr)) { 993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * pair_ptr; 995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_inst_from_list(inst_list, list_ptr); 996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list_score(&s->ReadyAlpha, list_ptr); 997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (pair_ptr = s->ReadyRGB; pair_ptr; 999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_ptr = pair_ptr->NextReady) { 1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (merge_instructions(&pair_ptr->Instruction->U.P, 1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &list_ptr->Instruction->U.P)) { 1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_inst_from_list(&s->ReadyAlpha, list_ptr); 1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_inst_from_list(&s->ReadyRGB, pair_ptr); 1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_ptr->PairedInst = list_ptr; 1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list(&s->ReadyFullALU, pair_ptr); 1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org list_ptr = *inst_list; 1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org paired = 1; 1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!paired) { 1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnext: 1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org list_ptr = list_ptr->NextReady; 1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This function attempts to merge RGB and Alpha instructions together. 1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void pair_instructions(struct schedule_state * s) 1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *rgb_ptr; 1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *alpha_ptr; 1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Some pairings might fail because they require too 1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * many source slots; try all possible pairings if necessary */ 1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb_ptr = s->ReadyRGB; 1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(rgb_ptr) { 1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * rgb_next = rgb_ptr->NextReady; 1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org alpha_ptr = s->ReadyAlpha; 1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(alpha_ptr) { 1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * alpha_next = alpha_ptr->NextReady; 1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) { 1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Remove RGB and Alpha from their ready lists. 1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_inst_from_list(&s->ReadyRGB, rgb_ptr); 1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_inst_from_list(&s->ReadyAlpha, alpha_ptr); 1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb_ptr->PairedInst = alpha_ptr; 1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_inst_to_list(&s->ReadyFullALU, rgb_ptr); 1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org alpha_ptr = alpha_next; 1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rgb_ptr = rgb_next; 1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!s->Opt) { 1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB 1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * slot can be converted into Alpha instructions. */ 1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org try_convert_and_pair(s, &s->ReadyFullALU); 1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Try to convert some of the RGB instructions to Alpha and 1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * try to pair it with another RGB. */ 1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org try_convert_and_pair(s, &s->ReadyRGB); 1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void update_max_score( 1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state * s, 1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction ** list, 1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int * max_score, 1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction ** max_inst_out, 1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction *** list_out) 1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * list_ptr; 1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { 1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int score; 1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->CalcScore(list_ptr); 1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org score = list_ptr->Score; 1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!*max_inst_out || score > *max_score) { 1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *max_score = score; 1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *max_inst_out = list_ptr; 1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list_out = list; 1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void emit_instruction( 1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state * s, 1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * before) 1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int max_score = -1; 1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * max_inst = NULL; 1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction ** max_list = NULL; 1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tex_count = 0; 1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * tex_ptr; 1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pair_instructions(s); 1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if VERBOSE 1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "Full:\n"); 1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org print_list(s->ReadyFullALU); 1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "RGB:\n"); 1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org print_list(s->ReadyRGB); 1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "Alpha:\n"); 1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org print_list(s->ReadyAlpha); 1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "TEX:\n"); 1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org print_list(s->ReadyTEX); 1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { 1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { 1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_all_tex(s, before); 1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tex_count++; 1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); 1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); 1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); 1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (tex_count >= s->max_tex_group || max_score == -1 1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || (s->TEXCount > 0 && tex_count == s->TEXCount) 1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org || (!s->C->is_r500 && tex_count > 0 && max_score == -1)) { 1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_all_tex(s, before); 1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_inst_from_list(max_list, max_inst); 1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_insert_instruction(before->Prev, max_inst->Instruction); 1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org commit_alu_instruction(s, max_inst); 1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org presub_nop(before->Prev); 1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void add_tex_reader( 1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state * s, 1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * writer, 1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_instruction * reader) 1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) { 1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /*Not a TEX instructions */ 1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader->TexReadCount++; 1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader)); 1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void scan_read(void * data, struct rc_instruction * inst, 1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, unsigned int index, unsigned int chan) 1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state * s = data; 1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value ** v = get_reg_valuep(s, file, index, chan); 1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value_reader * reader; 1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!v) 1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (*v && (*v)->Writer == s->Current) { 1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The instruction reads and writes to a register component. 1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * In this case, we only want to increment dependencies by one. 1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Why? 1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Because each instruction depends on the writers of its source 1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers _and_ the most recent writer of its destination 1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register. In this case, the current instruction (s->Current) 1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * has a dependency that both writes to one of its source 1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers and was the most recent writer to its destination 1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register. We have already marked this dependency in 1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * scan_write(), so we don't need to do it again. 1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We need to make sure we are adding s->Current to the 1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * previous writer's list of TexReaders, if the previous writer 1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * was a TEX instruction. 1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_tex_reader(s, s->PrevWriter[chan], s->Current); 1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); 1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); 1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader->Reader = s->Current; 1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!*v) { 1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* In this situation, the instruction reads from a register 1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * that hasn't been written to or read from in the current 1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * block. */ 1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); 1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(*v, 0, sizeof(struct reg_value)); 1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (*v)->Readers = reader; 1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader->Next = (*v)->Readers; 1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (*v)->Readers = reader; 1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Only update the current instruction's dependencies if the 1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register it reads from has been written to in this block. */ 1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((*v)->Writer) { 1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add_tex_reader(s, (*v)->Writer, s->Current); 1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->NumDependencies++; 1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (*v)->NumReaders++; 1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s->Current->NumReadValues >= 12) { 1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); 1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->ReadValues[s->Current->NumReadValues++] = *v; 1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void scan_write(void * data, struct rc_instruction * inst, 1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_register_file file, unsigned int index, unsigned int chan) 1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state * s = data; 1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value ** pv = get_reg_valuep(s, file, index, chan); 1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct reg_value * newv; 1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!pv) 1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); 1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); 1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(newv, 0, sizeof(*newv)); 1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org newv->Writer = s->Current; 1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (*pv) { 1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (*pv)->Next = newv; 1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->NumDependencies++; 1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Keep track of the previous writer to s->Current's destination 1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register */ 1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->PrevWriter[chan] = (*pv)->Writer; 1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *pv = newv; 1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s->Current->NumWriteValues >= 4) { 1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); 1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->WriteValues[s->Current->NumWriteValues++] = newv; 1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void is_rgb_to_alpha_possible_normal( 1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void * userdata, 1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst, 1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_src_register * src) 1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_reader_data * reader_data = userdata; 1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reader_data->Abort = 1; 1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void schedule_block(struct schedule_state * s, 1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * begin, struct rc_instruction * end) 1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int ip; 1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Scan instructions for data dependencies */ 1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ip = 0; 1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { 1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); 1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(s->Current, 0, sizeof(struct schedule_instruction)); 1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->Type == RC_INSTRUCTION_NORMAL) { 1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * info = 1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_opcode_info(inst->U.I.Opcode); 1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (info->HasTexture) { 1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->TEXCount++; 1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: This causes SemWait to be set for all instructions in 1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a block if the previous block contained a TEX instruction. 1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * We can do better here, but it will take a lot of work. */ 1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s->PrevBlockHasTex) { 1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->TexReadCount = 1; 1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->Instruction = inst; 1278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->IP = ip++; 1279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: Scanning\n", inst->IP); 1281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The order of things here is subtle and maybe slightly 1283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * counter-intuitive, to account for the case where an 1284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instruction writes to the same register as it reads 1285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * from. */ 1286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_writes_chan(inst, &scan_write, s); 1287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_for_all_reads_chan(inst, &scan_read, s); 1288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); 1290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!s->Current->NumDependencies) { 1292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org instruction_ready(s, s->Current); 1293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Get global readers for possible RGB->Alpha conversion. */ 1296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s->Current->GlobalReaders.ExitOnAbort = 1; 1297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rc_get_readers(s->C, inst, &s->Current->GlobalReaders, 1298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_rgb_to_alpha_possible_normal, 1299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org is_rgb_to_alpha_possible, NULL); 1300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Temporarily unlink all instructions */ 1303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org begin->Prev->Next = end; 1304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end->Prev = begin->Prev; 1305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Schedule instructions back */ 1307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(!s->C->Error && 1308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { 1309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_instruction(s, end); 1310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic int is_controlflow(struct rc_instruction * inst) 1314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->Type == RC_INSTRUCTION_NORMAL) { 1316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 1317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return opcode->IsFlowControl; 1318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 1320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid rc_pair_schedule(struct radeon_compiler *cc, void *user) 1323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; 1325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct schedule_state s; 1326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * inst = c->Base.Program.Instructions.Next; 1327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int * opt = user; 1328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&s, 0, sizeof(s)); 1330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.Opt = *opt; 1331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.C = &c->Base; 1332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s.C->is_r500) { 1333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.CalcScore = calc_score_readers; 1334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.CalcScore = calc_score_r300; 1336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); 1338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(inst != &c->Base.Program.Instructions) { 1339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct rc_instruction * first; 1340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_controlflow(inst)) { 1342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = inst->Next; 1343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org first = inst; 1347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) 1349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = inst->Next; 1350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DBG("Schedule one block\n"); 1352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(s.Temporary, 0, sizeof(s.Temporary)); 1353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.TEXCount = 0; 1354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org schedule_block(&s, first, inst); 1355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s.PendingTEX) { 1356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s.PrevBlockHasTex = 1; 1357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1360