1865f88afc0d59d886fb2ad50429e584ecf17fa81Brian/*
2865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Copyright © 2011 Intel Corporation
3827e72de7537e62cac9652f8b7344ff356de9bb1Brian *
4865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Permission is hereby granted, free of charge, to any person obtaining a
5827e72de7537e62cac9652f8b7344ff356de9bb1Brian * copy of this software and associated documentation files (the "Software"),
6865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * to deal in the Software without restriction, including without limitation
7865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * and/or sell copies of the Software, and to permit persons to whom the
9865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Software is furnished to do so, subject to the following conditions:
10865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
11865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * The above copyright notice and this permission notice (including the next
12865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * paragraph) shall be included in all copies or substantial portions of the
13865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * Software.
14865f88afc0d59d886fb2ad50429e584ecf17fa81Brian *
15865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21865f88afc0d59d886fb2ad50429e584ecf17fa81Brian * IN THE SOFTWARE.
22865f88afc0d59d886fb2ad50429e584ecf17fa81Brian */
23865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
24865f88afc0d59d886fb2ad50429e584ecf17fa81Brianextern "C" {
25c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "main/macros.h"
26c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "program/register_allocate.h"
27c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian} /* extern "C" */
28c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian
29c223c6b663cd5db39ba19c2be74b88cc3b8f53f3Brian#include "brw_vec4.h"
30865f88afc0d59d886fb2ad50429e584ecf17fa81Brian#include "glsl/ir_print_visitor.h"
31c968d3d410a1897ecbb41d3557adaef69a4c627aBrian
32865f88afc0d59d886fb2ad50429e584ecf17fa81Brianusing namespace brw;
33865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
34865f88afc0d59d886fb2ad50429e584ecf17fa81Briannamespace brw {
35865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
36865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
37865f88afc0d59d886fb2ad50429e584ecf17fa81Brianassign(unsigned int *reg_hw_locations, reg *reg)
38865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
39865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (reg->file == GRF) {
40865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      reg->reg = reg_hw_locations[reg->reg];
41865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
42865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
43865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
44827e72de7537e62cac9652f8b7344ff356de9bb1Brianbool
45865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvec4_visitor::reg_allocate_trivial()
46827e72de7537e62cac9652f8b7344ff356de9bb1Brian{
47827e72de7537e62cac9652f8b7344ff356de9bb1Brian   unsigned int hw_reg_mapping[this->virtual_grf_count];
48865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   bool virtual_grf_used[this->virtual_grf_count];
49999b55663a09d9669a9d14c5aadfa84e6dcba288Brian   int i;
50865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   int next;
51865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
52865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* Calculate which virtual GRFs are actually in use after whatever
53865f88afc0d59d886fb2ad50429e584ecf17fa81Brian    * optimization passes have occurred.
54865f88afc0d59d886fb2ad50429e584ecf17fa81Brian    */
55865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (int i = 0; i < this->virtual_grf_count; i++) {
56865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      virtual_grf_used[i] = false;
57865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
58865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
59865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   foreach_iter(exec_list_iterator, iter, this->instructions) {
60865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      vec4_instruction *inst = (vec4_instruction *)iter.get();
61865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
62865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      if (inst->dst.file == GRF)
63865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 virtual_grf_used[inst->dst.reg] = true;
64865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
65999b55663a09d9669a9d14c5aadfa84e6dcba288Brian      for (int i = 0; i < 3; i++) {
66865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 if (inst->src[i].file == GRF)
67865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	    virtual_grf_used[inst->src[i].reg] = true;
68865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
69865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
70865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
71865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   hw_reg_mapping[0] = this->first_non_payload_grf;
72865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
73865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (i = 1; i < this->virtual_grf_count; i++) {
74999b55663a09d9669a9d14c5aadfa84e6dcba288Brian      if (virtual_grf_used[i]) {
75999b55663a09d9669a9d14c5aadfa84e6dcba288Brian	 hw_reg_mapping[i] = next;
76999b55663a09d9669a9d14c5aadfa84e6dcba288Brian	 next += this->virtual_grf_sizes[i];
77999b55663a09d9669a9d14c5aadfa84e6dcba288Brian      }
78999b55663a09d9669a9d14c5aadfa84e6dcba288Brian   }
79999b55663a09d9669a9d14c5aadfa84e6dcba288Brian   prog_data->total_grf = next;
80999b55663a09d9669a9d14c5aadfa84e6dcba288Brian
81865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   foreach_iter(exec_list_iterator, iter, this->instructions) {
82999b55663a09d9669a9d14c5aadfa84e6dcba288Brian      vec4_instruction *inst = (vec4_instruction *)iter.get();
83999b55663a09d9669a9d14c5aadfa84e6dcba288Brian
84999b55663a09d9669a9d14c5aadfa84e6dcba288Brian      assign(hw_reg_mapping, &inst->dst);
85865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      assign(hw_reg_mapping, &inst->src[0]);
86865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      assign(hw_reg_mapping, &inst->src[1]);
87865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      assign(hw_reg_mapping, &inst->src[2]);
88865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
89865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
90865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (prog_data->total_grf > max_grf) {
91865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      fail("Ran out of regs on trivial allocator (%d/%d)\n",
92865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	   prog_data->total_grf, max_grf);
93865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return false;
94865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
95865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
96865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   return true;
97865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
98865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
99865f88afc0d59d886fb2ad50429e584ecf17fa81Brianstatic void
100865f88afc0d59d886fb2ad50429e584ecf17fa81Brianbrw_alloc_reg_set_for_classes(struct brw_context *brw,
101865f88afc0d59d886fb2ad50429e584ecf17fa81Brian			      int *class_sizes,
102865f88afc0d59d886fb2ad50429e584ecf17fa81Brian			      int class_count,
103f6803de7396edda2223adf7ff7445579dbe475c9Brian			      int base_reg_count)
104f6803de7396edda2223adf7ff7445579dbe475c9Brian{
105f6803de7396edda2223adf7ff7445579dbe475c9Brian   /* Compute the total number of registers across all classes. */
106865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   int ra_reg_count = 0;
107ced6f76404ff1a6713c85edff17551f82c33cc24Brian   for (int i = 0; i < class_count; i++) {
108865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
109865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
1103ed1acd13c7876288a5d1ab6d288b1654f0c2e6dBrian
111865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ralloc_free(brw->vs.ra_reg_to_grf);
112865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
113f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian   ralloc_free(brw->vs.regs);
114f3e507ef9f75dbfc58ccd07b5fe8cfca10d9a9e3Brian   brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count);
11510b5895597d5e069183cb647d17eb412effceb4fBrian   ralloc_free(brw->vs.classes);
11660d136f63c5a5a18b12952ec8e8532cbce086a4dBrian   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
11760d136f63c5a5a18b12952ec8e8532cbce086a4dBrian
11860d136f63c5a5a18b12952ec8e8532cbce086a4dBrian   /* Now, add the registers to their classes, and add the conflicts
11960d136f63c5a5a18b12952ec8e8532cbce086a4dBrian    * between them and the base GRF registers (and also each other).
120e48f0b09abe42aa3393a492af07e53b76ad0ff3cBrian    */
121af0ae93863b4c876e70efa4e7406f04a3409f135Brian   int reg = 0;
122af0ae93863b4c876e70efa4e7406f04a3409f135Brian   for (int i = 0; i < class_count; i++) {
12302afd45d3b2eccff5d566cdeb32b3211803bd500Brian      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
124af0ae93863b4c876e70efa4e7406f04a3409f135Brian      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
12510b5895597d5e069183cb647d17eb412effceb4fBrian
126f6803de7396edda2223adf7ff7445579dbe475c9Brian      for (int j = 0; j < class_reg_count; j++) {
127865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
128865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
129865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 brw->vs.ra_reg_to_grf[reg] = j;
130865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
131865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 for (int base_reg = j;
132865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	      base_reg < j + class_sizes[i];
133865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	      base_reg++) {
134865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
135865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 }
136f6803de7396edda2223adf7ff7445579dbe475c9Brian
137f6803de7396edda2223adf7ff7445579dbe475c9Brian	 reg++;
138f6803de7396edda2223adf7ff7445579dbe475c9Brian      }
139865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
140865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   assert(reg == ra_reg_count);
141865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
142865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   ra_set_finalize(brw->vs.regs);
143865f88afc0d59d886fb2ad50429e584ecf17fa81Brian}
144865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
145865f88afc0d59d886fb2ad50429e584ecf17fa81Brianbool
146865f88afc0d59d886fb2ad50429e584ecf17fa81Brianvec4_visitor::reg_allocate()
147865f88afc0d59d886fb2ad50429e584ecf17fa81Brian{
1481c09bcfdda4083636a3ac27d804a34ef87875ce7Brian   unsigned int hw_reg_mapping[virtual_grf_count];
149865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   int first_assigned_grf = this->first_non_payload_grf;
1501c09bcfdda4083636a3ac27d804a34ef87875ce7Brian   int base_reg_count = max_grf - first_assigned_grf;
1518b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian   int class_sizes[base_reg_count];
152865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   int class_count = 0;
153865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
154865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   /* Using the trivial allocator can be useful in debugging undefined
155865f88afc0d59d886fb2ad50429e584ecf17fa81Brian    * register access as a result of broken optimization passes.
1568b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian    */
157865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (0)
1588b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian      return reg_allocate_trivial();
1591c09bcfdda4083636a3ac27d804a34ef87875ce7Brian
160865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   calculate_live_intervals();
1611c09bcfdda4083636a3ac27d804a34ef87875ce7Brian
1621c09bcfdda4083636a3ac27d804a34ef87875ce7Brian   /* Set up the register classes.
1638b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian    *
1641c09bcfdda4083636a3ac27d804a34ef87875ce7Brian    * The base registers store a vec4.  However, we'll need larger
1651c09bcfdda4083636a3ac27d804a34ef87875ce7Brian    * storage for arrays, structures, and matrices, which will be sets
1661c09bcfdda4083636a3ac27d804a34ef87875ce7Brian    * of contiguous registers.
1671c09bcfdda4083636a3ac27d804a34ef87875ce7Brian    */
1681c09bcfdda4083636a3ac27d804a34ef87875ce7Brian   class_sizes[class_count++] = 1;
1691c09bcfdda4083636a3ac27d804a34ef87875ce7Brian
170ff73c783cc47361ff0dd819c82d067b4b85870ddBrian   for (int r = 0; r < virtual_grf_count; r++) {
171ff73c783cc47361ff0dd819c82d067b4b85870ddBrian      int i;
172ff73c783cc47361ff0dd819c82d067b4b85870ddBrian
173ff73c783cc47361ff0dd819c82d067b4b85870ddBrian      for (i = 0; i < class_count; i++) {
174ff73c783cc47361ff0dd819c82d067b4b85870ddBrian	 if (class_sizes[i] == this->virtual_grf_sizes[r])
1751c09bcfdda4083636a3ac27d804a34ef87875ce7Brian	    break;
1761c09bcfdda4083636a3ac27d804a34ef87875ce7Brian      }
1771c09bcfdda4083636a3ac27d804a34ef87875ce7Brian      if (i == class_count) {
178865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
179865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	    fail("Object too large to register allocate.\n");
1801c09bcfdda4083636a3ac27d804a34ef87875ce7Brian	 }
1818b5fce6bcc88cd9dd321f0db95c1714e5e5e85a1Brian
182865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
183865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
184865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
185865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
186865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
187865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
188865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
189865f88afc0d59d886fb2ad50429e584ecf17fa81Brian						    virtual_grf_count);
190865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
191865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (int i = 0; i < virtual_grf_count; i++) {
192865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      for (int c = 0; c < class_count; c++) {
193865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
194865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	    ra_set_node_class(g, i, brw->vs.classes[c]);
195865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	    break;
196865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 }
197865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
198865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
199865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      for (int j = 0; j < i; j++) {
200865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 if (virtual_grf_interferes(i, j)) {
201865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	    ra_add_node_interference(g, i, j);
202865f88afc0d59d886fb2ad50429e584ecf17fa81Brian	 }
203865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
204865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   }
205865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
206865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   if (!ra_allocate_no_spills(g)) {
207865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      /* Failed to allocate registers.  Spill a reg, and the caller will
208865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       * loop back into here to try again.
209865f88afc0d59d886fb2ad50429e584ecf17fa81Brian       */
21017ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian      int reg = choose_spill_reg(g);
21117ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian      if (reg == -1) {
21217ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian         fail("no register to spill\n");
213865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      } else {
214865f88afc0d59d886fb2ad50429e584ecf17fa81Brian         spill_reg(reg);
215865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      }
216865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      ralloc_free(g);
217865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      return false;
21817ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian   }
21917ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian
22017ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian   /* Get the chosen virtual registers for each node, and map virtual
22117ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian    * regs in the register classes back down to real hardware reg
22217ad1d12ebf04ebf4b2b35c1c37d36bb4d2bb550Brian    * numbers.
223865f88afc0d59d886fb2ad50429e584ecf17fa81Brian    */
224865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   prog_data->total_grf = first_assigned_grf;
225865f88afc0d59d886fb2ad50429e584ecf17fa81Brian   for (int i = 0; i < virtual_grf_count; i++) {
226865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      int reg = ra_get_node_reg(g, i);
227865f88afc0d59d886fb2ad50429e584ecf17fa81Brian
228865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
229865f88afc0d59d886fb2ad50429e584ecf17fa81Brian      prog_data->total_grf = MAX2(prog_data->total_grf,
230865f88afc0d59d886fb2ad50429e584ecf17fa81Brian				  hw_reg_mapping[i] + virtual_grf_sizes[i]);
231   }
232
233   foreach_list(node, &this->instructions) {
234      vec4_instruction *inst = (vec4_instruction *)node;
235
236      assign(hw_reg_mapping, &inst->dst);
237      assign(hw_reg_mapping, &inst->src[0]);
238      assign(hw_reg_mapping, &inst->src[1]);
239      assign(hw_reg_mapping, &inst->src[2]);
240   }
241
242   ralloc_free(g);
243
244   return true;
245}
246
247void
248vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
249{
250   float loop_scale = 1.0;
251
252   for (int i = 0; i < this->virtual_grf_count; i++) {
253      spill_costs[i] = 0.0;
254      no_spill[i] = virtual_grf_sizes[i] != 1;
255   }
256
257   /* Calculate costs for spilling nodes.  Call it a cost of 1 per
258    * spill/unspill we'll have to do, and guess that the insides of
259    * loops run 10 times.
260    */
261   foreach_list(node, &this->instructions) {
262      vec4_instruction *inst = (vec4_instruction *) node;
263
264      for (unsigned int i = 0; i < 3; i++) {
265	 if (inst->src[i].file == GRF) {
266	    spill_costs[inst->src[i].reg] += loop_scale;
267            if (inst->src[i].reladdr)
268               no_spill[inst->src[i].reg] = true;
269	 }
270      }
271
272      if (inst->dst.file == GRF) {
273	 spill_costs[inst->dst.reg] += loop_scale;
274         if (inst->dst.reladdr)
275            no_spill[inst->dst.reg] = true;
276      }
277
278      switch (inst->opcode) {
279
280      case BRW_OPCODE_DO:
281	 loop_scale *= 10;
282	 break;
283
284      case BRW_OPCODE_WHILE:
285	 loop_scale /= 10;
286	 break;
287
288      case VS_OPCODE_SCRATCH_READ:
289      case VS_OPCODE_SCRATCH_WRITE:
290         for (int i = 0; i < 3; i++) {
291            if (inst->src[i].file == GRF)
292               no_spill[inst->src[i].reg] = true;
293         }
294	 if (inst->dst.file == GRF)
295	    no_spill[inst->dst.reg] = true;
296	 break;
297
298      default:
299	 break;
300      }
301   }
302}
303
304int
305vec4_visitor::choose_spill_reg(struct ra_graph *g)
306{
307   float spill_costs[this->virtual_grf_count];
308   bool no_spill[this->virtual_grf_count];
309
310   evaluate_spill_costs(spill_costs, no_spill);
311
312   for (int i = 0; i < this->virtual_grf_count; i++) {
313      if (!no_spill[i])
314         ra_set_node_spill_cost(g, i, spill_costs[i]);
315   }
316
317   return ra_get_best_spill_node(g);
318}
319
320void
321vec4_visitor::spill_reg(int spill_reg_nr)
322{
323   assert(virtual_grf_sizes[spill_reg_nr] == 1);
324   unsigned int spill_offset = c->last_scratch++;
325
326   /* Generate spill/unspill instructions for the objects being spilled. */
327   foreach_list(node, &this->instructions) {
328      vec4_instruction *inst = (vec4_instruction *) node;
329
330      for (unsigned int i = 0; i < 3; i++) {
331         if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
332            src_reg spill_reg = inst->src[i];
333            inst->src[i].reg = virtual_grf_alloc(1);
334            dst_reg temp = dst_reg(inst->src[i]);
335
336            /* Only read the necessary channels, to avoid overwriting the rest
337             * with data that may not have been written to scratch.
338             */
339            temp.writemask = 0;
340            for (int c = 0; c < 4; c++)
341               temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c));
342            assert(temp.writemask != 0);
343
344            emit_scratch_read(inst, temp, spill_reg, spill_offset);
345         }
346      }
347
348      if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
349         dst_reg spill_reg = inst->dst;
350         inst->dst.reg = virtual_grf_alloc(1);
351
352         /* We don't want a swizzle when reading from the source; read the
353          * whole register and use spill_reg's writemask to select which
354          * channels to write.
355          */
356         src_reg temp = src_reg(inst->dst);
357         temp.swizzle = BRW_SWIZZLE_XYZW;
358         emit_scratch_write(inst, temp, spill_reg, spill_offset);
359      }
360   }
361
362   this->live_intervals_valid = false;
363}
364
365} /* namespace brw */
366