1865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/* 2865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Copyright © 2011 Intel Corporation 3827e72de7537e62cac9652f8b7344ff356de9bb1Brian * 4865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Permission is hereby granted, free of charge, to any person obtaining a 5827e72de7537e62cac9652f8b7344ff356de9bb1Brian * copy of this software and associated documentation files (the "Software"), 6865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * to deal in the Software without restriction, including without limitation 7865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * and/or sell copies of the Software, and to permit persons to whom the 9865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Software is furnished to do so, subject to the following conditions: 10865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * 11865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * The above copyright notice and this permission notice (including the next 12865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * paragraph) shall be included in all copies or substantial portions of the 13865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Software. 14865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * 15865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * IN THE SOFTWARE. 22865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */ 23865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 24865f88afc0d59d886fb2ad50429e584ecf17fa81Brianextern "C" { 25c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "main/macros.h" 26c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "program/register_allocate.h" 27c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian} /* extern "C" */ 28c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian 29c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "brw_vec4.h" 30865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "glsl/ir_print_visitor.h" 31c968d3d410a1897ecbb41d3557adaef69a4c627aBrian 32865f88afc0d59d886fb2ad50429e584ecf17fa81Brianusing namespace brw; 33865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 34865f88afc0d59d886fb2ad50429e584ecf17fa81Briannamespace brw { 35865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 36865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void 37865f88afc0d59d886fb2ad50429e584ecf17fa81Brianassign(unsigned int *reg_hw_locations, reg *reg) 38865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{ 39865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (reg->file == GRF) { 40865f88afc0d59d886fb2ad50429e584ecf17fa81Brian reg->reg = reg_hw_locations[reg->reg]; 41865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 42865f88afc0d59d886fb2ad50429e584ecf17fa81Brian} 43865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 44827e72de7537e62cac9652f8b7344ff356de9bb1Brianbool 45865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvec4_visitor::reg_allocate_trivial() 46827e72de7537e62cac9652f8b7344ff356de9bb1Brian{ 47827e72de7537e62cac9652f8b7344ff356de9bb1Brian unsigned int hw_reg_mapping[this->virtual_grf_count]; 48865f88afc0d59d886fb2ad50429e584ecf17fa81Brian bool virtual_grf_used[this->virtual_grf_count]; 49999b55663a09d9669a9d14c5aadfa84e6dcba288Brian int i; 50865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int next; 51865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 52865f88afc0d59d886fb2ad50429e584ecf17fa81Brian /* Calculate which virtual GRFs are actually in use after whatever 53865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * optimization passes have occurred. 54865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */ 55865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (int i = 0; i < this->virtual_grf_count; i++) { 56865f88afc0d59d886fb2ad50429e584ecf17fa81Brian virtual_grf_used[i] = false; 57865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 58865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 59865f88afc0d59d886fb2ad50429e584ecf17fa81Brian foreach_iter(exec_list_iterator, iter, this->instructions) { 60865f88afc0d59d886fb2ad50429e584ecf17fa81Brian vec4_instruction *inst = (vec4_instruction *)iter.get(); 61865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 62865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (inst->dst.file == GRF) 63865f88afc0d59d886fb2ad50429e584ecf17fa81Brian virtual_grf_used[inst->dst.reg] = true; 64865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 65999b55663a09d9669a9d14c5aadfa84e6dcba288Brian for (int i = 0; i < 3; i++) { 66865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (inst->src[i].file == GRF) 67865f88afc0d59d886fb2ad50429e584ecf17fa81Brian virtual_grf_used[inst->src[i].reg] = true; 68865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 69865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 70865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 71865f88afc0d59d886fb2ad50429e584ecf17fa81Brian hw_reg_mapping[0] = this->first_non_payload_grf; 72865f88afc0d59d886fb2ad50429e584ecf17fa81Brian next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; 73865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (i = 1; i < this->virtual_grf_count; i++) { 74999b55663a09d9669a9d14c5aadfa84e6dcba288Brian if (virtual_grf_used[i]) { 75999b55663a09d9669a9d14c5aadfa84e6dcba288Brian hw_reg_mapping[i] = next; 76999b55663a09d9669a9d14c5aadfa84e6dcba288Brian next += this->virtual_grf_sizes[i]; 77999b55663a09d9669a9d14c5aadfa84e6dcba288Brian } 78999b55663a09d9669a9d14c5aadfa84e6dcba288Brian } 79999b55663a09d9669a9d14c5aadfa84e6dcba288Brian prog_data->total_grf = next; 80999b55663a09d9669a9d14c5aadfa84e6dcba288Brian 81865f88afc0d59d886fb2ad50429e584ecf17fa81Brian foreach_iter(exec_list_iterator, iter, this->instructions) { 82999b55663a09d9669a9d14c5aadfa84e6dcba288Brian vec4_instruction *inst = (vec4_instruction *)iter.get(); 83999b55663a09d9669a9d14c5aadfa84e6dcba288Brian 84999b55663a09d9669a9d14c5aadfa84e6dcba288Brian assign(hw_reg_mapping, &inst->dst); 85865f88afc0d59d886fb2ad50429e584ecf17fa81Brian assign(hw_reg_mapping, &inst->src[0]); 86865f88afc0d59d886fb2ad50429e584ecf17fa81Brian assign(hw_reg_mapping, &inst->src[1]); 87865f88afc0d59d886fb2ad50429e584ecf17fa81Brian assign(hw_reg_mapping, &inst->src[2]); 88865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 89865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 90865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (prog_data->total_grf > max_grf) { 91865f88afc0d59d886fb2ad50429e584ecf17fa81Brian fail("Ran out of regs on trivial allocator (%d/%d)\n", 92865f88afc0d59d886fb2ad50429e584ecf17fa81Brian prog_data->total_grf, max_grf); 93865f88afc0d59d886fb2ad50429e584ecf17fa81Brian return false; 94865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 95865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 96865f88afc0d59d886fb2ad50429e584ecf17fa81Brian return true; 97865f88afc0d59d886fb2ad50429e584ecf17fa81Brian} 98865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 99865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void 100865f88afc0d59d886fb2ad50429e584ecf17fa81Brianbrw_alloc_reg_set_for_classes(struct brw_context *brw, 101865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int *class_sizes, 102865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int class_count, 103f6803de7396edda2223adf7ff7445579dbe475c9Brian int base_reg_count) 104f6803de7396edda2223adf7ff7445579dbe475c9Brian{ 105f6803de7396edda2223adf7ff7445579dbe475c9Brian /* Compute the total number of registers across all classes. */ 106865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int ra_reg_count = 0; 107ced6f76404ff1a6713c85edff17551f82c33cc24Brian for (int i = 0; i < class_count; i++) { 108865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ra_reg_count += base_reg_count - (class_sizes[i] - 1); 109865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 1103ed1acd13c7876288a5d1ab6d288b1654f0c2e6dBrian 111865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ralloc_free(brw->vs.ra_reg_to_grf); 112865f88afc0d59d886fb2ad50429e584ecf17fa81Brian brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); 113f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian ralloc_free(brw->vs.regs); 114f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count); 11510b5895597d5e069183cb647d17eb412effceb4fBrian ralloc_free(brw->vs.classes); 11660d136f63c5a5a18b12952ec8e8532cbce086a4dBrian brw->vs.classes = ralloc_array(brw, int, class_count + 1); 11760d136f63c5a5a18b12952ec8e8532cbce086a4dBrian 11860d136f63c5a5a18b12952ec8e8532cbce086a4dBrian /* Now, add the registers to their classes, and add the conflicts 11960d136f63c5a5a18b12952ec8e8532cbce086a4dBrian * between them and the base GRF registers (and also each other). 120e48f0b09abe42aa3393a492af07e53b76ad0ff3cBrian */ 121af0ae93863b4c876e70efa4e7406f04a3409f135Brian int reg = 0; 122af0ae93863b4c876e70efa4e7406f04a3409f135Brian for (int i = 0; i < class_count; i++) { 12302afd45d3b2eccff5d566cdeb32b3211803bd500Brian int class_reg_count = base_reg_count - (class_sizes[i] - 1); 124af0ae93863b4c876e70efa4e7406f04a3409f135Brian brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs); 12510b5895597d5e069183cb647d17eb412effceb4fBrian 126f6803de7396edda2223adf7ff7445579dbe475c9Brian for (int j = 0; j < class_reg_count; j++) { 127865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg); 128865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 129865f88afc0d59d886fb2ad50429e584ecf17fa81Brian brw->vs.ra_reg_to_grf[reg] = j; 130865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 131865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (int base_reg = j; 132865f88afc0d59d886fb2ad50429e584ecf17fa81Brian base_reg < j + class_sizes[i]; 133865f88afc0d59d886fb2ad50429e584ecf17fa81Brian base_reg++) { 134865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg); 135865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 136f6803de7396edda2223adf7ff7445579dbe475c9Brian 137f6803de7396edda2223adf7ff7445579dbe475c9Brian reg++; 138f6803de7396edda2223adf7ff7445579dbe475c9Brian } 139865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 140865f88afc0d59d886fb2ad50429e584ecf17fa81Brian assert(reg == ra_reg_count); 141865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 142865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ra_set_finalize(brw->vs.regs); 143865f88afc0d59d886fb2ad50429e584ecf17fa81Brian} 144865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 145865f88afc0d59d886fb2ad50429e584ecf17fa81Brianbool 146865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvec4_visitor::reg_allocate() 147865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{ 1481c09bcfdda4083636a3ac27d804a34ef87875ce7Brian unsigned int hw_reg_mapping[virtual_grf_count]; 149865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int first_assigned_grf = this->first_non_payload_grf; 1501c09bcfdda4083636a3ac27d804a34ef87875ce7Brian int base_reg_count = max_grf - first_assigned_grf; 1518b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian int class_sizes[base_reg_count]; 152865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int class_count = 0; 153865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 154865f88afc0d59d886fb2ad50429e584ecf17fa81Brian /* Using the trivial allocator can be useful in debugging undefined 155865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * register access as a result of broken optimization passes. 1568b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian */ 157865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (0) 1588b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian return reg_allocate_trivial(); 1591c09bcfdda4083636a3ac27d804a34ef87875ce7Brian 160865f88afc0d59d886fb2ad50429e584ecf17fa81Brian calculate_live_intervals(); 1611c09bcfdda4083636a3ac27d804a34ef87875ce7Brian 1621c09bcfdda4083636a3ac27d804a34ef87875ce7Brian /* Set up the register classes. 1638b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian * 1641c09bcfdda4083636a3ac27d804a34ef87875ce7Brian * The base registers store a vec4. However, we'll need larger 1651c09bcfdda4083636a3ac27d804a34ef87875ce7Brian * storage for arrays, structures, and matrices, which will be sets 1661c09bcfdda4083636a3ac27d804a34ef87875ce7Brian * of contiguous registers. 1671c09bcfdda4083636a3ac27d804a34ef87875ce7Brian */ 1681c09bcfdda4083636a3ac27d804a34ef87875ce7Brian class_sizes[class_count++] = 1; 1691c09bcfdda4083636a3ac27d804a34ef87875ce7Brian 170ff73c783cc47361ff0dd819c82d067b4b85870ddBrian for (int r = 0; r < virtual_grf_count; r++) { 171ff73c783cc47361ff0dd819c82d067b4b85870ddBrian int i; 172ff73c783cc47361ff0dd819c82d067b4b85870ddBrian 173ff73c783cc47361ff0dd819c82d067b4b85870ddBrian for (i = 0; i < class_count; i++) { 174ff73c783cc47361ff0dd819c82d067b4b85870ddBrian if (class_sizes[i] == this->virtual_grf_sizes[r]) 1751c09bcfdda4083636a3ac27d804a34ef87875ce7Brian break; 1761c09bcfdda4083636a3ac27d804a34ef87875ce7Brian } 1771c09bcfdda4083636a3ac27d804a34ef87875ce7Brian if (i == class_count) { 178865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (this->virtual_grf_sizes[r] >= base_reg_count) { 179865f88afc0d59d886fb2ad50429e584ecf17fa81Brian fail("Object too large to register allocate.\n"); 1801c09bcfdda4083636a3ac27d804a34ef87875ce7Brian } 1818b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian 182865f88afc0d59d886fb2ad50429e584ecf17fa81Brian class_sizes[class_count++] = this->virtual_grf_sizes[r]; 183865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 184865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 185865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 186865f88afc0d59d886fb2ad50429e584ecf17fa81Brian brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count); 187865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 188865f88afc0d59d886fb2ad50429e584ecf17fa81Brian struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs, 189865f88afc0d59d886fb2ad50429e584ecf17fa81Brian virtual_grf_count); 190865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 191865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (int i = 0; i < virtual_grf_count; i++) { 192865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (int c = 0; c < class_count; c++) { 193865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (class_sizes[c] == this->virtual_grf_sizes[i]) { 194865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ra_set_node_class(g, i, brw->vs.classes[c]); 195865f88afc0d59d886fb2ad50429e584ecf17fa81Brian break; 196865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 197865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 198865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 199865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (int j = 0; j < i; j++) { 200865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (virtual_grf_interferes(i, j)) { 201865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ra_add_node_interference(g, i, j); 202865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 203865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 204865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 205865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 206865f88afc0d59d886fb2ad50429e584ecf17fa81Brian if (!ra_allocate_no_spills(g)) { 207865f88afc0d59d886fb2ad50429e584ecf17fa81Brian /* Failed to allocate registers. Spill a reg, and the caller will 208865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * loop back into here to try again. 209865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */ 21017ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian int reg = choose_spill_reg(g); 21117ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian if (reg == -1) { 21217ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian fail("no register to spill\n"); 213865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } else { 214865f88afc0d59d886fb2ad50429e584ecf17fa81Brian spill_reg(reg); 215865f88afc0d59d886fb2ad50429e584ecf17fa81Brian } 216865f88afc0d59d886fb2ad50429e584ecf17fa81Brian ralloc_free(g); 217865f88afc0d59d886fb2ad50429e584ecf17fa81Brian return false; 21817ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian } 21917ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian 22017ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian /* Get the chosen virtual registers for each node, and map virtual 22117ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian * regs in the register classes back down to real hardware reg 22217ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian * numbers. 223865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */ 224865f88afc0d59d886fb2ad50429e584ecf17fa81Brian prog_data->total_grf = first_assigned_grf; 225865f88afc0d59d886fb2ad50429e584ecf17fa81Brian for (int i = 0; i < virtual_grf_count; i++) { 226865f88afc0d59d886fb2ad50429e584ecf17fa81Brian int reg = ra_get_node_reg(g, i); 227865f88afc0d59d886fb2ad50429e584ecf17fa81Brian 228865f88afc0d59d886fb2ad50429e584ecf17fa81Brian hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; 229865f88afc0d59d886fb2ad50429e584ecf17fa81Brian prog_data->total_grf = MAX2(prog_data->total_grf, 230865f88afc0d59d886fb2ad50429e584ecf17fa81Brian hw_reg_mapping[i] + virtual_grf_sizes[i]); 231 } 232 233 foreach_list(node, &this->instructions) { 234 vec4_instruction *inst = (vec4_instruction *)node; 235 236 assign(hw_reg_mapping, &inst->dst); 237 assign(hw_reg_mapping, &inst->src[0]); 238 assign(hw_reg_mapping, &inst->src[1]); 239 assign(hw_reg_mapping, &inst->src[2]); 240 } 241 242 ralloc_free(g); 243 244 return true; 245} 246 247void 248vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) 249{ 250 float loop_scale = 1.0; 251 252 for (int i = 0; i < this->virtual_grf_count; i++) { 253 spill_costs[i] = 0.0; 254 no_spill[i] = virtual_grf_sizes[i] != 1; 255 } 256 257 /* Calculate costs for spilling nodes. Call it a cost of 1 per 258 * spill/unspill we'll have to do, and guess that the insides of 259 * loops run 10 times. 260 */ 261 foreach_list(node, &this->instructions) { 262 vec4_instruction *inst = (vec4_instruction *) node; 263 264 for (unsigned int i = 0; i < 3; i++) { 265 if (inst->src[i].file == GRF) { 266 spill_costs[inst->src[i].reg] += loop_scale; 267 if (inst->src[i].reladdr) 268 no_spill[inst->src[i].reg] = true; 269 } 270 } 271 272 if (inst->dst.file == GRF) { 273 spill_costs[inst->dst.reg] += loop_scale; 274 if (inst->dst.reladdr) 275 no_spill[inst->dst.reg] = true; 276 } 277 278 switch (inst->opcode) { 279 280 case BRW_OPCODE_DO: 281 loop_scale *= 10; 282 break; 283 284 case BRW_OPCODE_WHILE: 285 loop_scale /= 10; 286 break; 287 288 case VS_OPCODE_SCRATCH_READ: 289 case VS_OPCODE_SCRATCH_WRITE: 290 for (int i = 0; i < 3; i++) { 291 if (inst->src[i].file == GRF) 292 no_spill[inst->src[i].reg] = true; 293 } 294 if (inst->dst.file == GRF) 295 no_spill[inst->dst.reg] = true; 296 break; 297 298 default: 299 break; 300 } 301 } 302} 303 304int 305vec4_visitor::choose_spill_reg(struct ra_graph *g) 306{ 307 float spill_costs[this->virtual_grf_count]; 308 bool no_spill[this->virtual_grf_count]; 309 310 evaluate_spill_costs(spill_costs, no_spill); 311 312 for (int i = 0; i < this->virtual_grf_count; i++) { 313 if (!no_spill[i]) 314 ra_set_node_spill_cost(g, i, spill_costs[i]); 315 } 316 317 return ra_get_best_spill_node(g); 318} 319 320void 321vec4_visitor::spill_reg(int spill_reg_nr) 322{ 323 assert(virtual_grf_sizes[spill_reg_nr] == 1); 324 unsigned int spill_offset = c->last_scratch++; 325 326 /* Generate spill/unspill instructions for the objects being spilled. */ 327 foreach_list(node, &this->instructions) { 328 vec4_instruction *inst = (vec4_instruction *) node; 329 330 for (unsigned int i = 0; i < 3; i++) { 331 if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { 332 src_reg spill_reg = inst->src[i]; 333 inst->src[i].reg = virtual_grf_alloc(1); 334 dst_reg temp = dst_reg(inst->src[i]); 335 336 /* Only read the necessary channels, to avoid overwriting the rest 337 * with data that may not have been written to scratch. 338 */ 339 temp.writemask = 0; 340 for (int c = 0; c < 4; c++) 341 temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c)); 342 assert(temp.writemask != 0); 343 344 emit_scratch_read(inst, temp, spill_reg, spill_offset); 345 } 346 } 347 348 if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) { 349 dst_reg spill_reg = inst->dst; 350 inst->dst.reg = virtual_grf_alloc(1); 351 352 /* We don't want a swizzle when reading from the source; read the 353 * whole register and use spill_reg's writemask to select which 354 * channels to write. 355 */ 356 src_reg temp = src_reg(inst->dst); 357 temp.swizzle = BRW_SWIZZLE_XYZW; 358 emit_scratch_write(inst, temp, spill_reg, spill_offset); 359 } 360 } 361 362 this->live_intervals_valid = false; 363} 364 365} /* namespace brw */ 366