1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2010 Intel Corporation 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"), 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions: 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software. 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN THE SOFTWARE. 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** @file brw_fs.cpp 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This file drives the GLSL IR -> LIR translation, contains the 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * optimizations on the LIR, and drives the generation of native code 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * from the LIR. 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgextern "C" { 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <sys/types.h> 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/macros.h" 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/shaderobj.h" 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/uniforms.h" 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/fbobject.h" 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/prog_parameter.h" 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/prog_print.h" 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/register_allocate.h" 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/sampler.h" 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "program/hash_table.h" 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_context.h" 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_eu.h" 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_wm.h" 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_shader.h" 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_fs.h" 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/glsl_types.h" 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_print_visitor.h" 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::init() 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(this, 0, sizeof(*this)); 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->opcode = BRW_OPCODE_NOP; 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->conditional_mod = BRW_CONDITIONAL_NONE; 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dst = reg_undef; 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[0] = reg_undef; 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[1] = reg_undef; 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[2] = reg_undef; 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::fs_inst() 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::fs_inst(enum opcode opcode) 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->opcode = opcode; 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::fs_inst(enum opcode opcode, fs_reg dst) 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->opcode = opcode; 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dst = dst; 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst.file == GRF) 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(dst.reg_offset >= 0); 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->opcode = opcode; 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dst = dst; 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[0] = src0; 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst.file == GRF) 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(dst.reg_offset >= 0); 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src[0].file == GRF) 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(src[0].reg_offset >= 0); 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->opcode = opcode; 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dst = dst; 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[0] = src0; 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[1] = src1; 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst.file == GRF) 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(dst.reg_offset >= 0); 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src[0].file == GRF) 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(src[0].reg_offset >= 0); 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src[1].file == GRF) 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(src[1].reg_offset >= 0); 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::fs_inst(enum opcode opcode, fs_reg dst, 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg src0, fs_reg src1, fs_reg src2) 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->opcode = opcode; 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dst = dst; 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[0] = src0; 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[1] = src1; 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->src[2] = src2; 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dst.file == GRF) 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(dst.reg_offset >= 0); 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src[0].file == GRF) 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(src[0].reg_offset >= 0); 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src[1].file == GRF) 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(src[1].reg_offset >= 0); 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src[2].file == GRF) 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(src[2].reg_offset >= 0); 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::equals(fs_inst *inst) 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (opcode == inst->opcode && 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst.equals(inst->dst) && 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src[0].equals(inst->src[0]) && 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src[1].equals(inst->src[1]) && 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src[2].equals(inst->src[2]) && 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org saturate == inst->saturate && 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org predicated == inst->predicated && 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org conditional_mod == inst->conditional_mod && 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mlen == inst->mlen && 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_mrf == inst->base_mrf && 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sampler == inst->sampler && 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org target == inst->target && 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org eot == inst->eot && 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org header_present == inst->header_present && 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shadow_compare == inst->shadow_compare && 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset == inst->offset); 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::regs_written() 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_tex()) 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 4; 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * but we don't currently use them...nor do we have an opcode for them. 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::overwrites_reg(const fs_reg ®) 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (reg.file == dst.file && 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg.reg == dst.reg && 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg.reg_offset >= dst.reg_offset && 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg.reg_offset < dst.reg_offset + regs_written()); 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::is_tex() 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (opcode == SHADER_OPCODE_TEX || 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == FS_OPCODE_TXB || 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_TXD || 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_TXF || 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_TXL || 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_TXS); 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst::is_math() 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (opcode == SHADER_OPCODE_RCP || 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_RSQ || 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_SQRT || 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_EXP2 || 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_LOG2 || 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_SIN || 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_COS || 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_INT_QUOTIENT || 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_INT_REMAINDER || 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opcode == SHADER_OPCODE_POW); 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::init() 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(this, 0, sizeof(*this)); 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->smear = -1; 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Generic unset register constructor. */ 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg() 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = BAD_FILE; 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Immediate value constructor. */ 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(float f) 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = IMM; 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = BRW_REGISTER_TYPE_F; 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->imm.f = f; 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Immediate value constructor. */ 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(int32_t i) 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = IMM; 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = BRW_REGISTER_TYPE_D; 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->imm.i = i; 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Immediate value constructor. */ 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(uint32_t u) 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = IMM; 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = BRW_REGISTER_TYPE_UD; 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->imm.u = u; 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Fixed brw_reg Immediate value constructor. */ 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(struct brw_reg fixed_hw_reg) 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = FIXED_HW_REG; 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->fixed_hw_reg = fixed_hw_reg; 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = fixed_hw_reg.type; 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::equals(const fs_reg &r) const 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (file == r.file && 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg == r.reg && 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg_offset == r.reg_offset && 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type == r.type && 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org negate == r.negate && 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs == r.abs && 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memcmp(&fixed_hw_reg, &r.fixed_hw_reg, 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeof(fixed_hw_reg)) == 0 && 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org smear == r.smear && 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.u == r.imm.u); 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::type_size(const struct glsl_type *type) 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int size, i; 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (type->base_type) { 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_UINT: 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_INT: 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_FLOAT: 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_BOOL: 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return type->components(); 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_ARRAY: 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return type_size(type->fields.array) * type->length; 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_STRUCT: 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = 0; 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < type->length; i++) { 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size += type_size(type->fields.structure[i].type); 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return size; 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_SAMPLER: 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Samplers take up no register space, since they're baked in at 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * link time. 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"not reached"); 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::fail(const char *format, ...) 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org va_list va; 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org char *msg; 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (failed) 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org failed = true; 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org va_start(va, format); 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org msg = ralloc_vasprintf(mem_ctx, format, va); 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org va_end(va); 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg); 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->fail_msg = msg; 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (INTEL_DEBUG & DEBUG_WM) { 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fprintf(stderr, "%s", msg); 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit(enum opcode opcode) 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return emit(fs_inst(opcode)); 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit(enum opcode opcode, fs_reg dst) 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return emit(fs_inst(opcode, dst)); 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit(enum opcode opcode, fs_reg dst, fs_reg src0) 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return emit(fs_inst(opcode, dst, src0)); 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return emit(fs_inst(opcode, dst, src0, src1)); 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit(enum opcode opcode, fs_reg dst, 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg src0, fs_reg src1, fs_reg src2) 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return emit(fs_inst(opcode, dst, src0, src1, src2)); 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::push_force_uncompressed() 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org force_uncompressed_stack++; 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::pop_force_uncompressed() 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org force_uncompressed_stack--; 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(force_uncompressed_stack >= 0); 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::push_force_sechalf() 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org force_sechalf_stack++; 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::pop_force_sechalf() 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org force_sechalf_stack--; 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(force_sechalf_stack >= 0); 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Returns how many MRFs an FS opcode will write over. 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Note that this is not the 0 or 1 implied writes in an actual gen 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instruction -- the FS opcodes often generate MOVs in addition. 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::implied_mrf_writes(fs_inst *inst) 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->mlen == 0) 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (inst->opcode) { 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_RCP: 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_RSQ: 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_SQRT: 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_EXP2: 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_LOG2: 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_SIN: 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_COS: 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1 * c->dispatch_width / 8; 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_POW: 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_INT_QUOTIENT: 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_INT_REMAINDER: 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 2 * c->dispatch_width / 8; 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_TEX: 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_TXB: 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_TXD: 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_TXF: 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_TXL: 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_TXS: 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_FB_WRITE: 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 2; 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_PULL_CONSTANT_LOAD: 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_UNSPILL: 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_SPILL: 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 2; 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"not reached"); 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return inst->mlen; 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::virtual_grf_alloc(int size) 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (virtual_grf_array_size <= virtual_grf_count) { 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (virtual_grf_array_size == 0) 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_array_size = 16; 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_array_size *= 2; 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_array_size); 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_sizes[virtual_grf_count] = size; 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return virtual_grf_count++; 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Fixed HW reg constructor. */ 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(enum register_file file, int reg) 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = file; 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->reg = reg; 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = BRW_REGISTER_TYPE_F; 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Fixed HW reg constructor. */ 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(enum register_file file, int reg, uint32_t type) 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = file; 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->reg = reg; 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = type; 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** Automatic reg constructor. */ 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org init(); 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->file = GRF; 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->reg = v->virtual_grf_alloc(v->type_size(type)); 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->reg_offset = 0; 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->type = brw_type_for_base_type(type); 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg * 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::variable_storage(ir_variable *var) 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return (fs_reg *)hash_table_find(this->variable_ht, var); 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgimport_uniforms_callback(const void *key, 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void *data, 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void *closure) 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct hash_table *dst_ht = (struct hash_table *)closure; 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const fs_reg *reg = (const fs_reg *)data; 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (reg->file != UNIFORM) 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hash_table_insert(dst_ht, data, key); 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* For 16-wide, we need to follow from the uniform setup of 8-wide dispatch. 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This brings in those uniform definitions 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::import_uniforms(fs_visitor *v) 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hash_table_call_foreach(v->variable_ht, 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org import_uniforms_callback, 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org variable_ht); 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->params_remap = v->params_remap; 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* Our support for uniforms is piggy-backed on the struct 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * gl_fragment_program, because that's where the values actually 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * get stored, rather than in some global gl_shader_program uniform 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * store. 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::setup_uniform_values(int loc, const glsl_type *type) 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int offset = 0; 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type->is_matrix()) { 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type->vector_elements, 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1); 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < type->matrix_columns; i++) { 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset += setup_uniform_values(loc + offset, column); 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return offset; 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (type->base_type) { 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_FLOAT: 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_UINT: 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_INT: 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_BOOL: 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < type->vector_elements; i++) { 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int param = c->prog_data.nr_params++; 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(param < ARRAY_SIZE(c->prog_data.param)); 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->param_index[param] = loc; 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->param_offset[param] = i; 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_STRUCT: 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < type->length; i++) { 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset += setup_uniform_values(loc + offset, 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type->fields.structure[i].type); 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return offset; 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_ARRAY: 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < type->length; i++) { 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset += setup_uniform_values(loc + offset, type->fields.array); 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return offset; 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case GLSL_TYPE_SAMPLER: 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The sampler takes up a slot, but we don't use any values from it. */ 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1; 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"not reached"); 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 0; 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* Our support for builtin uniforms is even scarier than non-builtin. 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * It sits on top of the PROG_STATE_VAR parameters that are 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * automatically updated from GL context state. 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::setup_builtin_uniform_values(ir_variable *ir) 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const ir_state_slot *const slots = ir->state_slots; 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(ir->state_slots != NULL); 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < ir->num_state_slots; i++) { 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* This state reference has already been setup by ir_to_mesa, but we'll 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * get the same index back here. 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int index = _mesa_add_state_reference(this->fp->Base.Parameters, 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (gl_state_index *)slots[i].tokens); 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Add each of the unique swizzles of the element as a parameter. 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This'll end up matching the expected layout of the 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * array/matrix/structure we're trying to fill in. 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int last_swiz = -1; 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int j = 0; j < 4; j++) { 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int swiz = GET_SWZ(slots[i].swizzle, j); 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (swiz == last_swiz) 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_swiz = swiz; 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->param_index[c->prog_data.nr_params] = index; 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->param_offset[c->prog_data.nr_params] = swiz; 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.nr_params++; 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg * 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg wpos = *reg; 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* gl_FragCoord.x */ 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ir->pixel_center_integer) { 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, wpos, this->pixel_x); 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)); 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org wpos.reg_offset++; 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* gl_FragCoord.y */ 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!flip && ir->pixel_center_integer) { 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, wpos, this->pixel_y); 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg pixel_y = this->pixel_y; 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (flip) { 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pixel_y.negate = true; 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset += c->key.drawable_height - 1.0; 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)); 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org wpos.reg_offset++; 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* gl_FragCoord.z */ 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 6) { 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, wpos, 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(FS_OPCODE_LINTERP, wpos, 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interp_reg(FRAG_ATTRIB_WPOS, 2)); 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org wpos.reg_offset++; 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* gl_FragCoord.w: Already set up in emit_interpolation */ 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, wpos, this->wpos_w); 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return reg; 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp, 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org glsl_interp_qualifier interpolation_mode, 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool is_centroid) 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_wm_barycentric_interp_mode barycoord_mode; 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (is_centroid) { 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (interpolation_mode == INTERP_QUALIFIER_SMOOTH) 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (interpolation_mode == INTERP_QUALIFIER_SMOOTH) 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return emit(FS_OPCODE_LINTERP, attr, 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->delta_x[barycoord_mode], 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->delta_y[barycoord_mode], interp); 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg * 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_general_interpolation(ir_variable *ir) 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg->type = brw_type_for_base_type(ir->type->get_scalar_type()); 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg attr = *reg; 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int array_elements; 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const glsl_type *type; 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ir->type->is_array()) { 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org array_elements = ir->type->length; 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (array_elements == 0) { 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("dereferenced array '%s' has length 0\n", ir->name); 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type = ir->type->fields.array; 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org array_elements = 1; 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type = ir->type; 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org glsl_interp_qualifier interpolation_mode = 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ir->determine_interpolation_mode(c->key.flat_shade); 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int location = ir->location; 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < array_elements; i++) { 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int j = 0; j < type->matrix_columns; j++) { 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (urb_setup[location] == -1) { 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If there's no incoming setup data for this slot, don't 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * emit interpolation for it. 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org attr.reg_offset += type->vector_elements; 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org location++; 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (interpolation_mode == INTERP_QUALIFIER_FLAT) { 705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Constant interpolation (flat shading) case. The SF has 706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * handed us defined values in only the constant offset 707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * field of the setup reg. 708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int k = 0; k < type->vector_elements; k++) { 710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_reg interp = interp_reg(location, k); 711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interp = suboffset(interp, 3); 712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interp.type = reg->type; 713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); 714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org attr.reg_offset++; 715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Smooth/noperspective interpolation case. */ 718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int k = 0; k < type->vector_elements; k++) { 719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* FINISHME: At some point we probably want to push 720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * this farther by giving similar treatment to the 721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * other potentially constant components of the 722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * attribute, as well as making brw_vs_constval.c 723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * handle varyings other than gl_TexCoord. 724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (location >= FRAG_ATTRIB_TEX0 && 726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org location <= FRAG_ATTRIB_TEX7 && 727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { 728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); 729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_reg interp = interp_reg(location, k); 731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_linterp(attr, fs_reg(interp), interpolation_mode, 732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ir->centroid); 733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (brw->needs_unlit_centroid_workaround && ir->centroid) { 734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Get the pixel/sample mask into f0 so that we know 735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * which pixels are lit. Then, for each channel that is 736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * unlit, replace the centroid data with non-centroid 737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * data. 738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS, attr); 740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = emit_linterp(attr, fs_reg(interp), 741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interpolation_mode, false); 742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->predicated = true; 743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->predicate_inverse = true; 744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen < 6) { 746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); 747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org attr.reg_offset++; 750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org location++; 754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return reg; 758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_reg * 761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The frontfacing comes in as a bit in the thread payload. */ 766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 6) { 767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_ASR, *reg, 768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg(15)); 770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_NOT, *reg, *reg); 771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1)); 772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * us front face 776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = emit(BRW_OPCODE_CMP, *reg, 778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg(r1_6ud), 779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg(1u << 31)); 780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->conditional_mod = BRW_CONDITIONAL_L; 781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)); 782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return reg; 785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) 789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (opcode) { 791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_RCP: 792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_RSQ: 793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_SQRT: 794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_EXP2: 795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_LOG2: 796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_SIN: 797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_COS: 798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"not reached: bad math opcode"); 801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Can't do hstride == 0 args to gen6 math, so expand it out. We 805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * might be able to do better by doing execsize = 1 math and then 806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * expanding that result out, but we would need to be careful with 807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * masking. 808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Gen 6 hardware ignores source modifiers (negate and abs) on math 810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instructions, so we also move to a temp to set those up. 811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen == 6 && (src.file == UNIFORM || 813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src.abs || 814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src.negate)) { 815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg expanded = fs_reg(this, glsl_type::float_type); 816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, expanded, src); 817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src = expanded; 818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = emit(opcode, dst, src); 821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen < 6) { 823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->base_mrf = 2; 824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->mlen = c->dispatch_width / 8; 825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return inst; 828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int base_mrf = 2; 834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst; 835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (opcode) { 837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_POW: 838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_INT_QUOTIENT: 839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_INT_REMAINDER: 840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"not reached: unsupported binary math opcode."); 843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 7) { 847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = emit(opcode, dst, src0, src1); 848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (intel->gen == 6) { 849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Can't do hstride == 0 args to gen6 math, so expand it out. 850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The hardware ignores source modifiers (negate and abs) on math 852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instructions, so we also move to a temp to set those up. 853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src0.file == UNIFORM || src0.abs || src0.negate) { 855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg expanded = fs_reg(this, glsl_type::float_type); 856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org expanded.type = src0.type; 857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, expanded, src0); 858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src0 = expanded; 859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src1.file == UNIFORM || src1.abs || src1.negate) { 862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg expanded = fs_reg(this, glsl_type::float_type); 863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org expanded.type = src1.type; 864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, expanded, src1); 865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src1 = expanded; 866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = emit(opcode, dst, src0, src1); 869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Message Payload": 872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Operand0[7]. For the INT DIV functions, this operand is the 874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * denominator." 875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ... 876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Operand1[7]. For the INT DIV functions, this operand is the 877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * numerator." 878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool is_int_div = opcode != SHADER_OPCODE_POW; 880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg &op0 = is_int_div ? src1 : src0; 881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg &op1 = is_int_div ? src0 : src1; 882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, op1.type), op1); 884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst = emit(opcode, dst, op0, reg_null_f); 885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->base_mrf = base_mrf; 887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->mlen = 2 * c->dispatch_width / 8; 888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return inst; 890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * To be called after the last _mesa_add_state_reference() call, to 894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * set up prog_data.param[] for assign_curb_setup() and 895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * setup_pull_constants(). 896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::setup_paramvalues_refs() 899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width != 8) 901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Set up the pointers to ParamValues now that that array is finalized. */ 904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { 905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.param[i] = 906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + 907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->param_offset[i]; 908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::assign_curb_setup() 913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width == 8) { 916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.first_curbe_grf = c->nr_payload_regs; 917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.first_curbe_grf_16 = c->nr_payload_regs; 919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Map the offsets in the UNIFORM file to fixed HW regs. */ 922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < 3; i++) { 926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == UNIFORM) { 927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; 928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + 929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant_nr / 8, 930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org constant_nr % 8); 931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].file = FIXED_HW_REG; 933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); 934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::calculate_urb_setup() 941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org urb_setup[i] = -1; 944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int urb_next = 0; 947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Figure out where each of the incoming setup attributes lands. */ 948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 6) { 949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp->Base.InputsRead & BITFIELD64_BIT(i)) { 951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org urb_setup[i] = urb_next++; 952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Point size is packed into the header, not as a general attribute */ 958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i == VERT_RESULT_PSIZ) 959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int fp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i); 963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The back color slot is skipped when the front color is 965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * also written to. In addition, some slots can be 966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * written in the vertex shader and not read in the 967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * fragment shader. So the register number must always be 968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * incremented, mapped or not. 969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp_index >= 0) 971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org urb_setup[fp_index] = urb_next; 972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org urb_next++; 973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * It's a FS only attribute, and we did interpolation for this attribute 978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in SF thread. So, count it here, too. 979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * See compile_sf_prog() for more info. 981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp->Base.InputsRead & BITFIELD64_BIT(FRAG_ATTRIB_PNTC)) 983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org urb_setup[FRAG_ATTRIB_PNTC] = urb_next++; 984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Each attribute is 4 setup channels, each of which is half a reg. */ 987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.urb_read_length = urb_next * 2; 988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::assign_urb_setup() 992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int urb_start = c->nr_payload_regs + c->prog_data.curb_read_length; 994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Offset all the urb_setup[] index by the actual position of the 996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * setup regs, now that the location of the constants has been chosen. 997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode == FS_OPCODE_LINTERP) { 1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(inst->src[2].file == FIXED_HW_REG); 1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[2].fixed_hw_reg.nr += urb_start; 1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode == FS_OPCODE_CINTERP) { 1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(inst->src[0].file == FIXED_HW_REG); 1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].fixed_hw_reg.nr += urb_start; 1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Split large virtual GRFs into separate components if we can. 1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This is mostly duplicated with what brw_fs_vector_splitting does, 1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * but that's really conservative because it's afraid of doing 1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * splitting that doesn't result in real progress after the rest of 1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the optimization phases, which would cause infinite looping in 1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * optimization. We can do it once here, safely. This also has the 1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * opportunity to split interpolated values, or maybe even uniforms, 1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * which we don't have at the IR level. 1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * We want to split, because virtual GRFs are what we register 1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * allocate and spill (due to contiguousness requirements for some 1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instructions), and they're what we naturally generate in the 1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * codegen process, but most virtual GRFs don't actually need to be 1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * contiguous sets of GRFs. If we split, we'll end up with reduced 1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * live intervals and better dead code elimination and coalescing. 1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::split_virtual_grfs() 1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int num_vars = this->virtual_grf_count; 1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool split_grf[num_vars]; 1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int new_virtual_grf[num_vars]; 1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Try to split anything > 0 sized. */ 1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < num_vars; i++) { 1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->virtual_grf_sizes[i] != 1) 1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_grf[i] = true; 1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_grf[i] = false; 1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (brw->has_pln && 1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF) { 1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* PLN opcodes rely on the delta_xy being contiguous. We only have to 1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * check this for BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC, because prior to 1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Gen6, that was the only supported interpolation mode, and since Gen6, 1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * delta_x and delta_y are in fixed hardware registers. 1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_grf[this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg] = 1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org false; 1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If there's a SEND message that requires contiguous destination 1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers, no splitting is allowed. 1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->regs_written() > 1) { 1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_grf[inst->dst.reg] = false; 1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Allocate new space for split regs. Note that the virtual 1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * numbers will be contiguous. 1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < num_vars; i++) { 1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (split_grf[i]) { 1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_virtual_grf[i] = virtual_grf_alloc(1); 1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg = virtual_grf_alloc(1); 1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(reg == new_virtual_grf[i] + j - 1); 1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (void) reg; 1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->virtual_grf_sizes[i] = 1; 1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF && 1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_grf[inst->dst.reg] && 1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg_offset != 0) { 1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg_offset - 1); 1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.reg_offset = 0; 1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file == GRF && 1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_grf[inst->src[i].reg] && 1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg_offset != 0) { 1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg_offset - 1); 1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg_offset = 0; 1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->live_intervals_valid = false; 1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::remove_dead_constants() 1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width == 8) { 1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); 1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < c->prog_data.nr_params; i++) 1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->params_remap[i] = -1; 1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Find which params are still in use. */ 1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; 1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file != UNIFORM) 1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(constant_nr < (int)c->prog_data.nr_params); 1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* For now, set this to non-negative. We'll give it the 1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * actual new number in a moment, in order to keep the 1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register numbers nicely ordered. 1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->params_remap[constant_nr] = 0; 1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Figure out what the new numbers for the params will be. At some 1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * point when we're doing uniform array access, we're going to want 1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to keep the distinction between .reg and .reg_offset, but for 1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * now we don't care. 1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int new_nr_params = 0; 1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { 1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->params_remap[i] != -1) { 1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->params_remap[i] = new_nr_params++; 1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Update the list of params to be uploaded to match our new numbering. */ 1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { 1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int remapped = this->params_remap[i]; 1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (remapped == -1) 1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We've already done setup_paramvalues_refs() so no need to worry 1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * about param_index and param_offset. 1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.param[remapped] = c->prog_data.param[i]; 1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.nr_params = new_nr_params; 1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* This should have been generated in the 8-wide pass already. */ 1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(this->params_remap); 1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Now do the renumbering of the shader to remove unused params. */ 1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; 1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file != UNIFORM) 1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(this->params_remap[constant_nr] != -1); 1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg = this->params_remap[constant_nr]; 1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg_offset = 0; 1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Choose accesses from the UNIFORM file to demote to using the pull 1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * constant buffer. 1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * We allow a fragment shader to have more than the specified minimum 1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * maximum number of fragment shader uniform components (64). If 1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * there are too many of these, they'd fill up all of register space. 1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * So, this will push some of them out to the pull constant buffer and 1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * update the program to load them. 1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::setup_pull_constants() 1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Only allow 16 registers (128 uniform components) as push constants. */ 1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int max_uniform_components = 16 * 8; 1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->prog_data.nr_params <= max_uniform_components) 1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width == 16) { 1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fail("Pull constants not supported in 16-wide\n"); 1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Just demote the end of the list. We could probably do better 1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * here, demoting things that are rarely used in the program first. 1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int pull_uniform_base = max_uniform_components; 1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[i].file != UNIFORM) 1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; 1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (uniform_nr < pull_uniform_base) 1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg dst = fs_reg(this, glsl_type::float_type); 1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER); 1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg offset = fs_reg((unsigned)(((uniform_nr - 1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pull_uniform_base) * 4) & ~15)); 1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst, index, offset); 1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pull->ir = inst->ir; 1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pull->annotation = inst->annotation; 1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pull->base_mrf = 14; 1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pull->mlen = 1; 1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->insert_before(pull); 1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].file = GRF; 1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg = dst.reg; 1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].reg_offset = 0; 1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < pull_uniform_count; i++) { 1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.nr_params -= pull_uniform_count; 1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.nr_pull_params = pull_uniform_count; 1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Attempts to move immediate constants into the immediate 1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * constant slot of following instructions. 1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Immediate constants are a bit tricky -- they have to be in the last 1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * operand slot, you can't do abs/negate on them, 1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::propagate_constants() 1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode != BRW_OPCODE_MOV || 1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->predicated || 1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.file != GRF || inst->src[0].file != IMM || 1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.type != inst->src[0].type || 1276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (c->dispatch_width == 16 && 1277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (inst->force_uncompressed || inst->force_sechalf))) 1278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Don't bother with cases where we should have had the 1281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * operation on the constant folded in GLSL already. 1282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->saturate) 1284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found a move of a constant to a GRF. Find anything else using the GRF 1287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * before it's written, and replace it with the constant if we can. 1288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (fs_inst *scan_inst = (fs_inst *)inst->next; 1290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !scan_inst->is_tail_sentinel(); 1291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst = (fs_inst *)scan_inst->next) { 1292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->opcode == BRW_OPCODE_DO || 1293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode == BRW_OPCODE_WHILE || 1294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode == BRW_OPCODE_ELSE || 1295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode == BRW_OPCODE_ENDIF) { 1296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 2; i >= 0; i--) { 1300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->src[i].file != GRF || 1301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg != inst->dst.reg || 1302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg_offset != inst->dst.reg_offset) 1303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Don't bother with cases where we should have had the 1306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * operation on the constant folded in GLSL already. 1307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->src[i].negate || scan_inst->src[i].abs) 1309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (scan_inst->opcode) { 1312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_MOV: 1313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i] = inst->src[0]; 1314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_MUL: 1318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_ADD: 1319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i == 1) { 1320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i] = inst->src[0]; 1321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (i == 0 && scan_inst->src[1].file != IMM) { 1323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Fit this constant in by commuting the operands. 1324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Exception: we can't do this for 32-bit integer MUL 1325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * because it's asymmetric. 1326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->opcode == BRW_OPCODE_MUL && 1328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (scan_inst->src[1].type == BRW_REGISTER_TYPE_D || 1329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[1].type == BRW_REGISTER_TYPE_UD)) 1330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[0] = scan_inst->src[1]; 1332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[1] = inst->src[0]; 1333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_CMP: 1338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_IF: 1339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i == 1) { 1340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i] = inst->src[0]; 1341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (i == 0 && scan_inst->src[1].file != IMM) { 1343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t new_cmod; 1344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_cmod = brw_swap_cmod(scan_inst->conditional_mod); 1346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (new_cmod != ~0u) { 1347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Fit this constant in by swapping the operands and 1348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * flipping the test 1349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[0] = scan_inst->src[1]; 1351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[1] = inst->src[0]; 1352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->conditional_mod = new_cmod; 1353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_SEL: 1359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i == 1) { 1360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i] = inst->src[0]; 1361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (i == 0 && scan_inst->src[1].file != IMM) { 1363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[0] = scan_inst->src[1]; 1364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[1] = inst->src[0]; 1365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If this was predicated, flipping operands means 1367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we also need to flip the predicate. 1368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) { 1370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->predicate_inverse = 1371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !scan_inst->predicate_inverse; 1372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case SHADER_OPCODE_RCP: 1378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The hardware doesn't do math on immediate values 1379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * (because why are you doing that, seriously?), but 1380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the correct answer is to just constant fold it 1381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * anyway. 1382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(i == 0); 1384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[0].imm.f != 0.0f) { 1385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode = BRW_OPCODE_MOV; 1386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[0] = inst->src[0]; 1387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; 1388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FS_OPCODE_PULL_CONSTANT_LOAD: 1393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i] = inst->src[0]; 1394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 1398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.file == GRF && 1403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->overwrites_reg(inst->dst)) { 1404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (progress) 1410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->live_intervals_valid = false; 1411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Attempts to move immediate constants into the immediate 1418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * constant slot of following instructions. 1419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Immediate constants are a bit tricky -- they have to be in the last 1421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * operand slot, you can't do abs/negate on them, 1422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::opt_algebraic() 1426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 1430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &this->instructions) { 1432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (inst->opcode) { 1435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_MUL: 1436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[1].file != IMM) 1437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* a * 1.0 = a */ 1440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->src[1].type == BRW_REGISTER_TYPE_F && 1441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[1].imm.f == 1.0) { 1442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->opcode = BRW_OPCODE_MOV; 1443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[1] = reg_undef; 1444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 1450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Must be called after calculate_live_intervales() to remove unused 1459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * writes to registers -- register allocation will fail otherwise 1460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * because something deffed but not used won't be considered to 1461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * interfere with other regs. 1462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::dead_code_eliminate() 1465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int pc = 0; 1468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 1470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list_safe(node, &this->instructions) { 1472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { 1475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->remove(); 1476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pc++; 1480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (progress) 1483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org live_intervals_valid = false; 1484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Implements a second type of register coalescing: This one checks if 1490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the two regs involved in a raw move don't interfere, in which case 1491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * they can both by stored in the same place and the MOV removed. 1492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::register_coalesce_2() 1495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 1499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list_safe(node, &this->instructions) { 1501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode != BRW_OPCODE_MOV || 1504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->predicated || 1505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->saturate || 1506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].file != GRF || 1507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].negate || 1508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].abs || 1509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].smear != -1 || 1510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.file != GRF || 1511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.type != inst->src[0].type || 1512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_sizes[inst->src[0].reg] != 1 || 1513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual_grf_interferes(inst->dst.reg, inst->src[0].reg)) { 1514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg_from = inst->src[0].reg; 1518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(inst->src[0].reg_offset == 0); 1519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg_to = inst->dst.reg; 1520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int reg_to_offset = inst->dst.reg_offset; 1521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list_safe(node, &this->instructions) { 1523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *scan_inst = (fs_inst *)node; 1524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.file == GRF && 1526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->dst.reg == reg_from) { 1527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->dst.reg = reg_to; 1528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->dst.reg_offset = reg_to_offset; 1529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->src[i].file == GRF && 1532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg == reg_from) { 1533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg = reg_to; 1534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg_offset = reg_to_offset; 1535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->remove(); 1540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org live_intervals_valid = false; 1541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::register_coalesce() 1550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int if_depth = 0; 1553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int loop_depth = 0; 1554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list_safe(node, &this->instructions) { 1556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Make sure that we dominate the instructions we're going to 1559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * scan for interfering with our coalescing, or we won't have 1560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * scanned enough to see if anything interferes with our 1561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * coalescing. We don't dominate the following instructions if 1562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we're in a loop or an if block. 1563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (inst->opcode) { 1565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_DO: 1566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_depth++; 1567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_WHILE: 1569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loop_depth--; 1570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_IF: 1572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if_depth++; 1573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_ENDIF: 1575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if_depth--; 1576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 1578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (loop_depth || if_depth) 1581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode != BRW_OPCODE_MOV || 1584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->predicated || 1585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->saturate || 1586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.file != GRF || (inst->src[0].file != GRF && 1587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].file != UNIFORM)|| 1588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.type != inst->src[0].type) 1589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool has_source_modifiers = inst->src[0].abs || inst->src[0].negate; 1592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found a move of a GRF to a GRF. Let's see if we can coalesce 1594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * them: check for no writes to either one until the exit of the 1595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * program. 1596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool interfered = false; 1598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (fs_inst *scan_inst = (fs_inst *)inst->next; 1600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !scan_inst->is_tail_sentinel(); 1601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst = (fs_inst *)scan_inst->next) { 1602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.file == GRF) { 1603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->overwrites_reg(inst->dst) || 1604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->overwrites_reg(inst->src[0])) { 1605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interfered = true; 1606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The gen6 MATH instruction can't handle source modifiers or 1611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * unusual register regions, so avoid coalescing those for 1612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * now. We should do something more specific. 1613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 6 && 1615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->is_math() && 1616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (has_source_modifiers || inst->src[0].file == UNIFORM)) { 1617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interfered = true; 1618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* The accumulator result appears to get used for the 1622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * conditional modifier generation. When negating a UD 1623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * value, there is a 33rd bit generated for the sign in the 1624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * accumulator value, so now you can't check, for example, 1625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * equality with a 32-bit value. See piglit fs-op-neg-uint. 1626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->conditional_mod && 1628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].negate && 1629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].type == BRW_REGISTER_TYPE_UD) { 1630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interfered = true; 1631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (interfered) { 1635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Rewrite the later usage to point at the source of the move to 1639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * be removed. 1640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (fs_inst *scan_inst = inst; 1642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !scan_inst->is_tail_sentinel(); 1643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst = (fs_inst *)scan_inst->next) { 1644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->src[i].file == GRF && 1646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg == inst->dst.reg && 1647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 1648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg new_src = inst->src[0]; 1649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->src[i].abs) { 1650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_src.negate = 0; 1651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_src.abs = 1; 1652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org new_src.negate ^= scan_inst->src[i].negate; 1654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i] = new_src; 1655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->remove(); 1660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (progress) 1664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org live_intervals_valid = false; 1665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::compute_to_mrf() 1672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int next_ip = 0; 1675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_live_intervals(); 1677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list_safe(node, &this->instructions) { 1679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int ip = next_ip; 1682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next_ip++; 1683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode != BRW_OPCODE_MOV || 1685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->predicated || 1686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.file != MRF || inst->src[0].file != GRF || 1687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.type != inst->src[0].type || 1688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 1689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Work out which hardware MRF registers are written by this 1692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instruction. 1693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; 1695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int mrf_high; 1696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.reg & BRW_MRF_COMPR4) { 1697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_high = mrf_low + 4; 1698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (c->dispatch_width == 16 && 1699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (!inst->force_uncompressed && !inst->force_sechalf)) { 1700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_high = mrf_low + 1; 1701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_high = mrf_low; 1703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Can't compute-to-MRF this GRF if someone else was going to 1706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * read it later. 1707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->virtual_grf_use[inst->src[0].reg] > ip) 1709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found a move of a GRF to a MRF. Let's see if we can go 1712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * rewrite the thing that made this GRF to write into the MRF. 1713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *scan_inst; 1715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (scan_inst = (fs_inst *)inst->prev; 1716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->prev != NULL; 1717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst = (fs_inst *)scan_inst->prev) { 1718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.file == GRF && 1719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->dst.reg == inst->src[0].reg) { 1720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found the last thing to write our reg we want to turn 1721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * into a compute-to-MRF. 1722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* SENDs can only write to GRFs, so no compute-to-MRF. */ 1725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->mlen) { 1726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If it's predicated, it (probably) didn't populate all 1730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the channels. We might be able to rewrite everything 1731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * that writes that reg, but it would require smarter 1732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * tracking to delay the rewriting until complete success. 1733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->predicated) 1735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If it's half of register setup and not the same half as 1738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * our MOV we're trying to remove, bail for now. 1739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->force_uncompressed != inst->force_uncompressed || 1741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->force_sechalf != inst->force_sechalf) { 1742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* SEND instructions can't have MRF as a destination. */ 1746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->mlen) 1747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 6) { 1750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* gen6 math instructions must have the destination be 1751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * GRF, so no compute-to-MRF for them. 1752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->is_math()) { 1754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 1759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found the creator of our MRF's source value. */ 1760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->dst.file = MRF; 1761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->dst.reg = inst->dst.reg; 1762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->saturate |= inst->saturate; 1763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->remove(); 1764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* We don't handle flow control here. Most computation of 1770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * values that end up in MRFs are shortly before the MRF 1771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * write anyway. 1772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->opcode == BRW_OPCODE_DO || 1774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode == BRW_OPCODE_WHILE || 1775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode == BRW_OPCODE_ELSE || 1776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->opcode == BRW_OPCODE_ENDIF) { 1777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* You can't read from an MRF, so if someone else reads our 1781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MRF's source GRF that we wanted to rewrite, that stops us. 1782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool interfered = false; 1784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < 3; i++) { 1785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->src[i].file == GRF && 1786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg == inst->src[0].reg && 1787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 1788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org interfered = true; 1789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (interfered) 1792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.file == MRF) { 1795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If somebody else writes our MRF here, we can't 1796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * compute-to-MRF before that. 1797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; 1799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int scan_mrf_high; 1800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->dst.reg & BRW_MRF_COMPR4) { 1802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_mrf_high = scan_mrf_low + 4; 1803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (c->dispatch_width == 16 && 1804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (!scan_inst->force_uncompressed && 1805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !scan_inst->force_sechalf)) { 1806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_mrf_high = scan_mrf_low + 1; 1807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scan_mrf_high = scan_mrf_low; 1809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mrf_low == scan_mrf_low || 1812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_low == scan_mrf_high || 1813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_high == scan_mrf_low || 1814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_high == scan_mrf_high) { 1815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (scan_inst->mlen > 0) { 1820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found a SEND instruction, which means that there are 1821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * live values in MRFs from base_mrf to base_mrf + 1822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * scan_inst->mlen - 1. Don't go pushing our MRF write up 1823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * above it. 1824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mrf_low >= scan_inst->base_mrf && 1826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_low < scan_inst->base_mrf + scan_inst->mlen) { 1827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mrf_high >= scan_inst->base_mrf && 1830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mrf_high < scan_inst->base_mrf + scan_inst->mlen) { 1831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (progress) 1838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org live_intervals_valid = false; 1839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Walks through basic blocks, looking for repeated MRF writes and 1845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * removing the later ones. 1846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::remove_duplicate_mrf_writes() 1849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *last_mrf_move[16]; 1851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress = false; 1852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Need to update the MRF tracking for compressed instructions. */ 1854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width == 16) 1855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(last_mrf_move, 0, sizeof(last_mrf_move)); 1858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list_safe(node, &this->instructions) { 1860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *inst = (fs_inst *)node; 1861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (inst->opcode) { 1863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_DO: 1864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_WHILE: 1865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_IF: 1866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_ELSE: 1867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case BRW_OPCODE_ENDIF: 1868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(last_mrf_move, 0, sizeof(last_mrf_move)); 1869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 1871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode == BRW_OPCODE_MOV && 1875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.file == MRF) { 1876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; 1877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prev_inst && inst->equals(prev_inst)) { 1878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->remove(); 1879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = true; 1880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Clear out the last-write records for MRFs that were overwritten. */ 1885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == MRF) { 1886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_mrf_move[inst->dst.reg] = NULL; 1887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->mlen > 0) { 1890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Found a SEND instruction, which will include two or fewer 1891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * implied MRF writes. We could do better here. 1892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < implied_mrf_writes(inst); i++) { 1894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_mrf_move[inst->base_mrf + i] = NULL; 1895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Clear out any MRF move records whose sources got overwritten. */ 1899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->dst.file == GRF) { 1900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { 1901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (last_mrf_move[i] && 1902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_mrf_move[i]->src[0].reg == inst->dst.reg) { 1903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_mrf_move[i] = NULL; 1904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (inst->opcode == BRW_OPCODE_MOV && 1909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->dst.file == MRF && 1910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inst->src[0].file == GRF && 1911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !inst->predicated) { 1912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org last_mrf_move[inst->dst.reg] = inst; 1913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (progress) 1917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org live_intervals_valid = false; 1918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return progress; 1920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 1923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Possibly returns an instruction that set up @param reg. 1924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Sometimes we want to take the result of some expression/variable 1926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * dereference tree and rewrite the instruction generating the result 1927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the tree. When processing the tree, we know that the 1928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * instructions generated are all writing temporaries that are dead 1929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * outside of this tree. So, if we have some instructions that write 1930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a temporary, we're free to point that temp write somewhere else. 1931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 1932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Note that this doesn't guarantee that the instruction generated 1933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * only reg -- it might be the size=4 destination of a texture instruction. 1934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_inst * 1936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::get_instruction_generating_reg(fs_inst *start, 1937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_inst *end, 1938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_reg reg) 1939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (end == start || 1941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end->predicated || 1942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end->force_uncompressed || 1943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org end->force_sechalf || 1944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !reg.equals(end->dst)) { 1945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 1946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return end; 1948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::run() 1953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t prog_offset_16 = 0; 1955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t orig_nr_params = c->prog_data.nr_params; 1956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_wm_payload_setup(brw, c); 1958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width == 16) { 1960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* align to 64 byte boundary. */ 1961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) { 1962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_NOP(p); 1963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Save off the start of this 16-wide program in case we succeed. */ 1966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog_offset_16 = c->func.nr_insn * sizeof(struct brw_instruction); 1967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 1969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (0) { 1972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_dummy_fs(); 1973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org calculate_urb_setup(); 1975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen < 6) 1976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_interpolation_setup_gen4(); 1977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_interpolation_setup_gen6(); 1979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Generate FS IR for main(). (the visitor only descends into 1981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * functions called "main"). 1982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 1983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foreach_list(node, &*shader->ir) { 1984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ir_instruction *ir = (ir_instruction *)node; 1985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_ir = ir; 1986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->result = reg_undef; 1987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ir->accept(this); 1988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (failed) 1990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org emit_fb_writes(); 1993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org split_virtual_grfs(); 1995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setup_paramvalues_refs(); 1997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setup_pull_constants(); 1998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool progress; 2000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org do { 2001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = false; 2002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = remove_duplicate_mrf_writes() || progress; 2004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = propagate_constants() || progress; 2006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = opt_algebraic() || progress; 2007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = opt_cse() || progress; 2008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = opt_copy_propagate() || progress; 2009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = register_coalesce() || progress; 2010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = register_coalesce_2() || progress; 2011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = compute_to_mrf() || progress; 2012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org progress = dead_code_eliminate() || progress; 2013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } while (progress); 2014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org remove_dead_constants(); 2016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org schedule_instructions(); 2018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_curb_setup(); 2020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_urb_setup(); 2021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (0) { 2023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Debug of register spilling: Go spill everything. */ 2024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < virtual_grf_count; i++) { 2025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org spill_reg(i); 2026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (0) 2030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assign_regs_trivial(); 2031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 2032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while (!assign_regs()) { 2033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (failed) 2034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(force_uncompressed_stack == 0); 2039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(force_sechalf_stack == 0); 2040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (failed) 2042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org generate_code(); 2045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (c->dispatch_width == 8) { 2047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.reg_blocks = brw_register_blocks(grf_used); 2048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 2049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used); 2050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.prog_offset_16 = prog_offset_16; 2051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Make sure we didn't try to sneak in an extra uniform */ 2053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(orig_nr_params == c->prog_data.nr_params); 2054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (void) orig_nr_params; 2055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return !failed; 2058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbrw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, 2062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct gl_shader_program *prog) 2063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct intel_context *intel = &brw->intel; 2065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool start_busy = false; 2066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float start_time = 0; 2067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog) 2069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { 2072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org start_busy = (intel->batch.last_bo && 2073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org drm_intel_bo_busy(intel->batch.last_bo)); 2074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org start_time = get_time(); 2075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_shader *shader = 2078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 2079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!shader) 2080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 2083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org _mesa_print_ir(shader->ir, NULL); 2085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org printf("\n\n"); 2086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Now the main event: Visit the shader IR and generate our FS IR for it. 2089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 2090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->dispatch_width = 8; 2091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_visitor v(c, prog, shader); 2093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!v.run()) { 2094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog->LinkStatus = false; 2095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ralloc_strcat(&prog->InfoLog, v.fail_msg); 2096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", 2098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org v.fail_msg); 2099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { 2104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->dispatch_width = 16; 2105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fs_visitor v2(c, prog, shader); 2106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org v2.import_uniforms(&v); 2107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!v2.run()) { 2108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org perf_debug("16-wide shader failed to compile, falling back to " 2109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "8-wide at a 10-20%% performance cost: %s", v2.fail_msg); 2110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org c->prog_data.dispatch_width = 8; 2114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { 2116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (shader->compiled_once) 2117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw_wm_debug_recompile(brw, prog, &c->key); 2118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shader->compiled_once = true; 2119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) { 2121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org perf_debug("FS compile took %.03f ms and stalled the GPU\n", 2122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (get_time() - start_time) * 1000); 2123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbrw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) 2131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_context *brw = brw_context(ctx); 2133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct intel_context *intel = &brw->intel; 2134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_wm_prog_key key; 2135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) 2137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct gl_fragment_program *fp = (struct gl_fragment_program *) 2140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 2141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_fragment_program *bfp = brw_fragment_program(fp); 2142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool program_uses_dfdy = fp->UsesDFdy; 2143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&key, 0, sizeof(key)); 2145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen < 6) { 2147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp->UsesKill) 2148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; 2149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) 2151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; 2152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Just assume depth testing. */ 2154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; 2155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; 2156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog->Name != 0) 2159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.proj_attrib_mask = 0xffffffff; 2160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen < 6) 2162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.vp_outputs_written |= BITFIELD64_BIT(FRAG_ATTRIB_WPOS); 2163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!(fp->Base.InputsRead & BITFIELD64_BIT(i))) 2166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 2167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog->Name == 0) 2169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.proj_attrib_mask |= 1 << i; 2170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (intel->gen < 6) { 2172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int vp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i); 2173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (vp_index >= 0) 2175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.vp_outputs_written |= BITFIELD64_BIT(vp_index); 2176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.clamp_fragment_color = true; 2180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < MAX_SAMPLERS; i++) { 2182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp->Base.ShadowSamplers & (1 << i)) { 2183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */ 2184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.tex.swizzles[i] = 2185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); 2186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 2187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Color sampler: assume no swizzling. */ 2188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.tex.swizzles[i] = SWIZZLE_XYZW; 2189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fp->Base.InputsRead & FRAG_BIT_WPOS) { 2193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.drawable_height = ctx->DrawBuffer->Height; 2194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((fp->Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) { 2197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); 2198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.nr_color_regions = 1; 2201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org key.program_string_id = bfp->id; 2203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t old_prog_offset = brw->wm.prog_offset; 2205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; 2206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool success = do_wm_prog(brw, prog, bfp, &key); 2208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.prog_offset = old_prog_offset; 2210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org brw->wm.prog_data = old_prog_data; 2211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return success; 2213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2214