1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2010 Intel Corporation 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"), 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions: 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software. 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN THE SOFTWARE. 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Authors: 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Eric Anholt <eric@anholt.net> 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_fs.h" 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/glsl_types.h" 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_optimization.h" 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_print_visitor.h" 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgassign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reg->file == GRF) { 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(reg->reg_offset >= 0); 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg->reg_offset = 0; 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::assign_regs_trivial() 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int hw_reg_mapping[this->virtual_grf_count + 1]; 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg_width = c->dispatch_width / 8; 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Note that compressed instructions require alignment to 2 registers. */ 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 1; i <= this->virtual_grf_count; i++) { 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->virtual_grf_sizes[i - 1] * reg_width); 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->grf_used = hw_reg_mapping[this->virtual_grf_count]; 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->dst, reg_width); 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->src[0], reg_width); 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->src[1], reg_width); 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->src[2], reg_width); 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->grf_used >= max_grf) { 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("Ran out of regs on trivial allocator (%d/%d)\n", 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->grf_used, max_grf); 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbrw_alloc_reg_set_for_classes(struct brw_context *brw, 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int *class_sizes, 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_count, 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg_width, 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int base_reg_count) 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct intel_context *intel = &brw->intel; 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Compute the total number of registers across all classes. */ 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int ra_reg_count = 0; 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < class_count; i++) { 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_reg_count += base_reg_count - (class_sizes[i] - 1); 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(brw->wm.ra_reg_to_grf); 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(brw->wm.regs); 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.regs = ra_alloc_reg_set(brw, ra_reg_count); 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(brw->wm.classes); 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.classes = ralloc_array(brw, int, class_count + 1); 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.aligned_pairs_class = -1; 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Now, add the registers to their classes, and add the conflicts 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * between them and the base GRF registers (and also each other). 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = 0; 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int pairs_base_reg = 0; 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int pairs_reg_count = 0; 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < class_count; i++) { 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_reg_count = base_reg_count - (class_sizes[i] - 1); 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Save this off for the aligned pair class at the end. */ 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (class_sizes[i] == 2) { 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pairs_base_reg = reg; 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pairs_reg_count = class_reg_count; 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int j = 0; j < class_reg_count; j++) { 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.ra_reg_to_grf[reg] = j; 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int base_reg = j; 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_reg < j + class_sizes[i]; 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_reg++) { 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg++; 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(reg == ra_reg_count); 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Add a special class for aligned pairs, which we'll put delta_x/y 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in on gen5 so that we can do PLN. 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (brw->has_pln && reg_width == 1 && intel->gen < 6) { 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < pairs_reg_count; i++) { 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pairs_base_reg + i); 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class_count++; 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_finalize(brw->wm.regs); 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::assign_regs() 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Most of this allocation was written for a reg_width of 1 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * (dispatch_width == 8). In extending to 16-wide, the code was 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * left in place and it was converted to have the hardware 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers it's allocating be contiguous physical pairs of regs 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * for reg_width == 2. 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg_width = c->dispatch_width / 8; 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int hw_reg_mapping[this->virtual_grf_count]; 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int base_reg_count = (max_grf - first_assigned_grf) / reg_width; 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_sizes[base_reg_count]; 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int class_count = 0; 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Set up the register classes. 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The base registers store a scalar value. For texture samples, 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we get virtual GRFs composed of 4 contiguous hw register. For 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * structures and arrays, we store them as contiguous larger things 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * than that, though we should be able to do better most of the 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * time. 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class_sizes[class_count++] = 1; 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (brw->has_pln && intel->gen < 6) { 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Always set up the (unaligned) pairs for gen5, so we can find 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * them for making the aligned pair class. 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class_sizes[class_count++] = 2; 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int r = 0; r < this->virtual_grf_count; r++) { 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int i; 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < class_count; i++) { 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (class_sizes[i] == this->virtual_grf_sizes[r]) 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i == class_count) { 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->virtual_grf_sizes[r] >= base_reg_count) { 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("Object too large to register allocate.\n"); 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class_sizes[class_count++] = this->virtual_grf_sizes[r]; 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg_width, base_reg_count); 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->virtual_grf_count); 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int c = 0; c < class_count; c++) { 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (class_sizes[c] == this->virtual_grf_sizes[i]) { 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Special case: on pre-GEN6 hardware that supports PLN, the 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * second operand of a PLN instruction needs to be an 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * even-numbered register, so we have a special register class 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * wm_aligned_pairs_class to handle this case. pre-GEN6 always 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * second operand of a PLN instruction (since it doesn't support 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * any other interpolation modes). So all we need to do is find 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * that register and set it to the appropriate class. 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (brw->wm.aligned_pairs_class >= 0 && 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_node_class(g, i, brw->wm.aligned_pairs_class); 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_node_class(g, i, brw->wm.classes[c]); 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int j = 0; j < i; j++) { 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (virtual_grf_interferes(i, j)) { 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_add_node_interference(g, i, j); 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ra_allocate_no_spills(g)) { 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Failed to allocate registers. Spill a reg, and the caller will 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loop back into here to try again. 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = choose_spill_reg(g); 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reg == -1) { 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("no register to spill\n"); 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (c->dispatch_width == 16) { 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("Failure to register allocate. Reduce number of live scalar " 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "values to avoid this."); 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_reg(reg); 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(g); 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Get the chosen virtual registers for each node, and map virtual 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * regs in the register classes back down to real hardware reg 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * numbers. 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->grf_used = first_assigned_grf; 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = ra_get_node_reg(g, i); 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[i] = (first_assigned_grf + 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.ra_reg_to_grf[reg] * reg_width); 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->grf_used = MAX2(this->grf_used, 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hw_reg_mapping[i] + this->virtual_grf_sizes[i] * 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg_width); 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->dst, reg_width); 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->src[0], reg_width); 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->src[1], reg_width); 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_reg(hw_reg_mapping, &inst->src[2], reg_width); 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_free(g); 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset) 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst); 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unspill_inst->offset = spill_offset; 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unspill_inst->ir = inst->ir; 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unspill_inst->annotation = inst->annotation; 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Choose a MRF that won't conflict with an MRF that's live across the 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * spill. Nothing else will make it up to MRF 14/15. 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unspill_inst->base_mrf = 14; 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unspill_inst->mlen = 1; /* header contains offset */ 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->insert_before(unspill_inst); 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::choose_spill_reg(struct ra_graph *g) 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float loop_scale = 1.0; 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float spill_costs[this->virtual_grf_count]; 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool no_spill[this->virtual_grf_count]; 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_costs[i] = 0.0; 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[i] = false; 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Calculate costs for spilling nodes. Call it a cost of 1 per 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * spill/unspill we'll have to do, and guess that the insides of 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loops run 10 times. 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < 3; i++) { 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF) { 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_costs[inst->src[i].reg] += loop_scale; 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Register spilling logic assumes full-width registers; smeared 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers have a width of 1 so if we try to spill them we'll 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * generate invalid assembly. This shouldn't be a problem because 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * smeared registers are only used as short-term temporaries when 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loading pull constants, so spilling them is unlikely to reduce 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register pressure anyhow. 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].smear >= 0) { 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->src[i].reg] = true; 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF) { 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale; 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.smear >= 0) { 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->dst.reg] = true; 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (inst->opcode) { 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_DO: 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_scale *= 10; 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_WHILE: 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_scale /= 10; 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_SPILL: 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[0].file == GRF) 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->src[0].reg] = true; 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_UNSPILL: 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF) 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org no_spill[inst->dst.reg] = true; 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < this->virtual_grf_count; i++) { 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!no_spill[i]) 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ra_set_node_spill_cost(g, i, spill_costs[i]); 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ra_get_best_spill_node(g); 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::spill_reg(int spill_reg) 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int size = virtual_grf_sizes[spill_reg]; 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int spill_offset = c->last_scratch; 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */ 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->last_scratch += size * REG_SIZE; 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Generate spill/unspill instructions for the objects being 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * spilled. Right now, we spill or unspill the whole thing to a 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * virtual grf of the same size. For most instructions, though, we 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * could just spill/unspill the GRF being accessed. 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < 3; i++) { 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF && 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg == spill_reg) { 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg = virtual_grf_alloc(1); 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_unspill(inst, inst->src[i], 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_offset + REG_SIZE * inst->src[i].reg_offset); 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF && 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg == spill_reg) { 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int subset_spill_offset = (spill_offset + 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org REG_SIZE * inst->dst.reg_offset); 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg = virtual_grf_alloc(inst->regs_written()); 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg_offset = 0; 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If our write is going to affect just part of the 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * inst->regs_written(), then we need to unspill the destination 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * since we write back out all of the regs_written(). 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->predicated || inst->force_uncompressed || inst->force_sechalf) { 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg unspill_reg = inst->dst; 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int chan = 0; chan < inst->regs_written(); chan++) { 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_unspill(inst, unspill_reg, 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org subset_spill_offset + REG_SIZE * chan); 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unspill_reg.reg_offset++; 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg spill_src = inst->dst; 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_src.reg_offset = 0; 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_src.abs = false; 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_src.negate = false; 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_src.smear = -1; 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int chan = 0; chan < inst->regs_written(); chan++) { 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL, 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg_null_f, spill_src); 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_src.reg_offset++; 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_inst->offset = subset_spill_offset + chan * REG_SIZE; 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_inst->ir = inst->ir; 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_inst->annotation = inst->annotation; 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_inst->base_mrf = 14; 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_inst->mlen = 2; /* header, value */ 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->insert_after(spill_inst); 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->live_intervals_valid = false; 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 442