1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2011 Intel Corporation
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"),
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions:
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software.
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN THE SOFTWARE.
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgextern "C" {
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/macros.h"
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/register_allocate.h"
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} /* extern "C" */
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_vec4.h"
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_print_visitor.h"
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace brw;
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace brw {
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgassign(unsigned int *reg_hw_locations, reg *reg)
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (reg->file == GRF) {
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      reg->reg = reg_hw_locations[reg->reg];
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::reg_allocate_trivial()
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int hw_reg_mapping[this->virtual_grf_count];
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool virtual_grf_used[this->virtual_grf_count];
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int i;
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int next;
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Calculate which virtual GRFs are actually in use after whatever
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * optimization passes have occurred.
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < this->virtual_grf_count; i++) {
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      virtual_grf_used[i] = false;
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_iter(exec_list_iterator, iter, this->instructions) {
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      vec4_instruction *inst = (vec4_instruction *)iter.get();
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (inst->dst.file == GRF)
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 virtual_grf_used[inst->dst.reg] = true;
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int i = 0; i < 3; i++) {
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (inst->src[i].file == GRF)
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    virtual_grf_used[inst->src[i].reg] = true;
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   hw_reg_mapping[0] = this->first_non_payload_grf;
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 1; i < this->virtual_grf_count; i++) {
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (virtual_grf_used[i]) {
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 hw_reg_mapping[i] = next;
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 next += this->virtual_grf_sizes[i];
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   prog_data->total_grf = next;
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_iter(exec_list_iterator, iter, this->instructions) {
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      vec4_instruction *inst = (vec4_instruction *)iter.get();
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->dst);
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->src[0]);
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->src[1]);
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->src[2]);
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (prog_data->total_grf > max_grf) {
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fail("Ran out of regs on trivial allocator (%d/%d)\n",
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	   prog_data->total_grf, max_grf);
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbrw_alloc_reg_set_for_classes(struct brw_context *brw,
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			      int *class_sizes,
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			      int class_count,
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			      int base_reg_count)
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Compute the total number of registers across all classes. */
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int ra_reg_count = 0;
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < class_count; i++) {
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ralloc_free(brw->vs.ra_reg_to_grf);
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ralloc_free(brw->vs.regs);
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count);
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ralloc_free(brw->vs.classes);
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Now, add the registers to their classes, and add the conflicts
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * between them and the base GRF registers (and also each other).
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int reg = 0;
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < class_count; i++) {
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int j = 0; j < class_reg_count; j++) {
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 brw->vs.ra_reg_to_grf[reg] = j;
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 for (int base_reg = j;
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	      base_reg < j + class_sizes[i];
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	      base_reg++) {
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 reg++;
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(reg == ra_reg_count);
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ra_set_finalize(brw->vs.regs);
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::reg_allocate()
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int hw_reg_mapping[virtual_grf_count];
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int first_assigned_grf = this->first_non_payload_grf;
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int base_reg_count = max_grf - first_assigned_grf;
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int class_sizes[base_reg_count];
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int class_count = 0;
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Using the trivial allocator can be useful in debugging undefined
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * register access as a result of broken optimization passes.
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (0)
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return reg_allocate_trivial();
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   calculate_live_intervals();
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Set up the register classes.
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * The base registers store a vec4.  However, we'll need larger
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * storage for arrays, structures, and matrices, which will be sets
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * of contiguous registers.
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class_sizes[class_count++] = 1;
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int r = 0; r < virtual_grf_count; r++) {
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int i;
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (i = 0; i < class_count; i++) {
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (class_sizes[i] == this->virtual_grf_sizes[r])
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    break;
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (i == class_count) {
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    fail("Object too large to register allocate.\n");
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						    virtual_grf_count);
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < virtual_grf_count; i++) {
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int c = 0; c < class_count; c++) {
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    ra_set_node_class(g, i, brw->vs.classes[c]);
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    break;
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (int j = 0; j < i; j++) {
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (virtual_grf_interferes(i, j)) {
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    ra_add_node_interference(g, i, j);
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ra_allocate_no_spills(g)) {
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* Failed to allocate registers.  Spill a reg, and the caller will
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       * loop back into here to try again.
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       */
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int reg = choose_spill_reg(g);
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (reg == -1) {
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         fail("no register to spill\n");
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      } else {
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         spill_reg(reg);
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ralloc_free(g);
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Get the chosen virtual registers for each node, and map virtual
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * regs in the register classes back down to real hardware reg
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * numbers.
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   prog_data->total_grf = first_assigned_grf;
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < virtual_grf_count; i++) {
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int reg = ra_get_node_reg(g, i);
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      prog_data->total_grf = MAX2(prog_data->total_grf,
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				  hw_reg_mapping[i] + virtual_grf_sizes[i]);
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list(node, &this->instructions) {
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      vec4_instruction *inst = (vec4_instruction *)node;
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->dst);
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->src[0]);
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->src[1]);
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      assign(hw_reg_mapping, &inst->src[2]);
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ralloc_free(g);
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   float loop_scale = 1.0;
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < this->virtual_grf_count; i++) {
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      spill_costs[i] = 0.0;
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      no_spill[i] = virtual_grf_sizes[i] != 1;
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Calculate costs for spilling nodes.  Call it a cost of 1 per
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * spill/unspill we'll have to do, and guess that the insides of
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * loops run 10 times.
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list(node, &this->instructions) {
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      vec4_instruction *inst = (vec4_instruction *) node;
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned int i = 0; i < 3; i++) {
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (inst->src[i].file == GRF) {
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    spill_costs[inst->src[i].reg] += loop_scale;
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (inst->src[i].reladdr)
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               no_spill[inst->src[i].reg] = true;
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (inst->dst.file == GRF) {
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 spill_costs[inst->dst.reg] += loop_scale;
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (inst->dst.reladdr)
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            no_spill[inst->dst.reg] = true;
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (inst->opcode) {
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case BRW_OPCODE_DO:
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 loop_scale *= 10;
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case BRW_OPCODE_WHILE:
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 loop_scale /= 10;
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case VS_OPCODE_SCRATCH_READ:
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case VS_OPCODE_SCRATCH_WRITE:
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         for (int i = 0; i < 3; i++) {
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            if (inst->src[i].file == GRF)
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               no_spill[inst->src[i].reg] = true;
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (inst->dst.file == GRF)
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    no_spill[inst->dst.reg] = true;
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::choose_spill_reg(struct ra_graph *g)
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   float spill_costs[this->virtual_grf_count];
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool no_spill[this->virtual_grf_count];
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   evaluate_spill_costs(spill_costs, no_spill);
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int i = 0; i < this->virtual_grf_count; i++) {
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!no_spill[i])
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         ra_set_node_spill_cost(g, i, spill_costs[i]);
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return ra_get_best_spill_node(g);
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvec4_visitor::spill_reg(int spill_reg_nr)
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(virtual_grf_sizes[spill_reg_nr] == 1);
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int spill_offset = c->last_scratch++;
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Generate spill/unspill instructions for the objects being spilled. */
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list(node, &this->instructions) {
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      vec4_instruction *inst = (vec4_instruction *) node;
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned int i = 0; i < 3; i++) {
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            src_reg spill_reg = inst->src[i];
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            inst->src[i].reg = virtual_grf_alloc(1);
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            dst_reg temp = dst_reg(inst->src[i]);
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            /* Only read the necessary channels, to avoid overwriting the rest
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org             * with data that may not have been written to scratch.
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org             */
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            temp.writemask = 0;
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            for (int c = 0; c < 4; c++)
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org               temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c));
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(temp.writemask != 0);
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            emit_scratch_read(inst, temp, spill_reg, spill_offset);
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         }
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         dst_reg spill_reg = inst->dst;
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         inst->dst.reg = virtual_grf_alloc(1);
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         /* We don't want a swizzle when reading from the source; read the
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          * whole register and use spill_reg's writemask to select which
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          * channels to write.
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          */
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         src_reg temp = src_reg(inst->dst);
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         temp.swizzle = BRW_SWIZZLE_XYZW;
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org         emit_scratch_write(inst, temp, spill_reg, spill_offset);
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   this->live_intervals_valid = false;
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} /* namespace brw */
366