1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2011 Intel Corporation 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"), 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions: 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software. 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN THE SOFTWARE. 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgextern "C" { 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/macros.h" 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/register_allocate.h" 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} /* extern "C" */ 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_vec4.h" 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_print_visitor.h" 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace brw; 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace brw { 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgassign(unsigned int *reg_hw_locations, reg *reg) 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reg->file == GRF) { 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg->reg = reg_hw_locations[reg->reg]; 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::reg_allocate_trivial() 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int hw_reg_mapping[this->virtual_grf_count]; 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool virtual_grf_used[this->virtual_grf_count]; 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int next; 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Calculate which virtual GRFs are actually in use after whatever 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * optimization passes have occurred. 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_used[i] = false; 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_iter(exec_list_iterator, iter, this->instructions) { 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec4_instruction *inst = (vec4_instruction *)iter.get(); 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF) 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_used[inst->dst.reg] = true; 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF) 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_used[inst->src[i].reg] = true; 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[0] = this->first_non_payload_grf; 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 1; i < this->virtual_grf_count; i++) { 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (virtual_grf_used[i]) { 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[i] = next; 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next += this->virtual_grf_sizes[i]; 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog_data->total_grf = next; 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_iter(exec_list_iterator, iter, this->instructions) { 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec4_instruction *inst = (vec4_instruction *)iter.get(); 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->dst); 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->src[0]); 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->src[1]); 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->src[2]); 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog_data->total_grf > max_grf) { 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("Ran out of regs on trivial allocator (%d/%d)\n", 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog_data->total_grf, max_grf); 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbrw_alloc_reg_set_for_classes(struct brw_context *brw, 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int *class_sizes, 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_count, 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int base_reg_count) 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Compute the total number of registers across all classes. */ 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int ra_reg_count = 0; 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < class_count; i++) { 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_reg_count += base_reg_count - (class_sizes[i] - 1); 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(brw->vs.ra_reg_to_grf); 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(brw->vs.regs); 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count); 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(brw->vs.classes); 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->vs.classes = ralloc_array(brw, int, class_count + 1); 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Now, add the registers to their classes, and add the conflicts 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * between them and the base GRF registers (and also each other). 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = 0; 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < class_count; i++) { 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_reg_count = base_reg_count - (class_sizes[i] - 1); 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs); 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int j = 0; j < class_reg_count; j++) { 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg); 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->vs.ra_reg_to_grf[reg] = j; 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int base_reg = j; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_reg < j + class_sizes[i]; 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_reg++) { 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg); 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg++; 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(reg == ra_reg_count); 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_finalize(brw->vs.regs); 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::reg_allocate() 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int hw_reg_mapping[virtual_grf_count]; 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int first_assigned_grf = this->first_non_payload_grf; 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int base_reg_count = max_grf - first_assigned_grf; 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_sizes[base_reg_count]; 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_count = 0; 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Using the trivial allocator can be useful in debugging undefined 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register access as a result of broken optimization passes. 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (0) 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return reg_allocate_trivial(); 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Set up the register classes. 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The base registers store a vec4. However, we'll need larger 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * storage for arrays, structures, and matrices, which will be sets 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of contiguous registers. 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class_sizes[class_count++] = 1; 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int r = 0; r < virtual_grf_count; r++) { 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < class_count; i++) { 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (class_sizes[i] == this->virtual_grf_sizes[r]) 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i == class_count) { 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->virtual_grf_sizes[r] >= base_reg_count) { 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("Object too large to register allocate.\n"); 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class_sizes[class_count++] = this->virtual_grf_sizes[r]; 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count); 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs, 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_count); 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < virtual_grf_count; i++) { 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int c = 0; c < class_count; c++) { 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (class_sizes[c] == this->virtual_grf_sizes[i]) { 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_node_class(g, i, brw->vs.classes[c]); 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int j = 0; j < i; j++) { 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (virtual_grf_interferes(i, j)) { 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_add_node_interference(g, i, j); 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ra_allocate_no_spills(g)) { 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Failed to allocate registers. Spill a reg, and the caller will 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loop back into here to try again. 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = choose_spill_reg(g); 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reg == -1) { 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("no register to spill\n"); 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_reg(reg); 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(g); 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Get the chosen virtual registers for each node, and map virtual 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * regs in the register classes back down to real hardware reg 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * numbers. 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog_data->total_grf = first_assigned_grf; 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < virtual_grf_count; i++) { 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = ra_get_node_reg(g, i); 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog_data->total_grf = MAX2(prog_data->total_grf, 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[i] + virtual_grf_sizes[i]); 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec4_instruction *inst = (vec4_instruction *)node; 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->dst); 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->src[0]); 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->src[1]); 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign(hw_reg_mapping, &inst->src[2]); 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(g); 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float loop_scale = 1.0; 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_costs[i] = 0.0; 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[i] = virtual_grf_sizes[i] != 1; 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Calculate costs for spilling nodes. Call it a cost of 1 per 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * spill/unspill we'll have to do, and guess that the insides of 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loops run 10 times. 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec4_instruction *inst = (vec4_instruction *) node; 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < 3; i++) { 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF) { 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_costs[inst->src[i].reg] += loop_scale; 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].reladdr) 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->src[i].reg] = true; 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF) { 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_costs[inst->dst.reg] += loop_scale; 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.reladdr) 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->dst.reg] = true; 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (inst->opcode) { 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_DO: 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_scale *= 10; 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_WHILE: 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_scale /= 10; 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case VS_OPCODE_SCRATCH_READ: 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case VS_OPCODE_SCRATCH_WRITE: 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF) 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->src[i].reg] = true; 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF) 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->dst.reg] = true; 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::choose_spill_reg(struct ra_graph *g) 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float spill_costs[this->virtual_grf_count]; 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool no_spill[this->virtual_grf_count]; 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org evaluate_spill_costs(spill_costs, no_spill); 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!no_spill[i]) 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_node_spill_cost(g, i, spill_costs[i]); 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ra_get_best_spill_node(g); 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::spill_reg(int spill_reg_nr) 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(virtual_grf_sizes[spill_reg_nr] == 1); 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int spill_offset = c->last_scratch++; 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Generate spill/unspill instructions for the objects being spilled. */ 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec4_instruction *inst = (vec4_instruction *) node; 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < 3; i++) { 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src_reg spill_reg = inst->src[i]; 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg = virtual_grf_alloc(1); 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_reg temp = dst_reg(inst->src[i]); 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Only read the necessary channels, to avoid overwriting the rest 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * with data that may not have been written to scratch. 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp.writemask = 0; 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int c = 0; c < 4; c++) 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c)); 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(temp.writemask != 0); 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_scratch_read(inst, temp, spill_reg, spill_offset); 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) { 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_reg spill_reg = inst->dst; 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg = virtual_grf_alloc(1); 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We don't want a swizzle when reading from the source; read the 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * whole register and use spill_reg's writemask to select which 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * channels to write. 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src_reg temp = src_reg(inst->dst); 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org temp.swizzle = BRW_SWIZZLE_XYZW; 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_scratch_write(inst, temp, spill_reg, spill_offset); 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->live_intervals_valid = false; 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} /* namespace brw */ 366