142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt/* 242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Copyright © 2011 Intel Corporation 342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * 442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Permission is hereby granted, free of charge, to any person obtaining a 542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * copy of this software and associated documentation files (the "Software"), 642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * to deal in the Software without restriction, including without limitation 742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * the rights to use, copy, modify, merge, publish, distribute, sublicense, 842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * and/or sell copies of the Software, and to permit persons to whom the 942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Software is furnished to do so, subject to the following conditions: 1042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * 1142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * The above copyright notice and this permission notice (including the next 1242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * paragraph) shall be included in all copies or substantial portions of the 1342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Software. 1442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * 1542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * IN THE SOFTWARE. 2242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 2342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 2442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt/** 2542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * @file brw_vec4_copy_propagation.cpp 2642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * 2742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Implements tracking of values copied between registers, and 2842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * optimizations based on that: copy propagation and constant 2942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * propagation. 3042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 3142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 3242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt#include "brw_vec4.h" 3342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtextern "C" { 3442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt#include "main/macros.h" 3542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} 3642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 3742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtnamespace brw { 3842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 3942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtstatic bool 4042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtis_direct_copy(vec4_instruction *inst) 4142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{ 4242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return (inst->opcode == BRW_OPCODE_MOV && 4342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt !inst->predicate && 4442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->dst.file == GRF && 4542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt !inst->saturate && 4642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt !inst->dst.reladdr && 4742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt !inst->src[0].reladdr && 4842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->dst.type == inst->src[0].type); 4942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} 5042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 5142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtstatic bool 5242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtis_dominated_by_previous_instruction(vec4_instruction *inst) 5342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{ 5442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return (inst->opcode != BRW_OPCODE_DO && 5542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->opcode != BRW_OPCODE_WHILE && 5642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->opcode != BRW_OPCODE_ELSE && 5742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->opcode != BRW_OPCODE_ENDIF); 5842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} 5942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 6042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtstatic bool 6142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholttry_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4]) 6242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{ 6342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* For constant propagation, we only handle the same constant 6442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * across all 4 channels. Some day, we should handle the 8-bit 6542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * float vector format, which would let us constant propagate 6642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * vectors better. 6742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 6842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt src_reg value = *values[0]; 6942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt for (int i = 1; i < 4; i++) { 7042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (!value.equals(values[i])) 7142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return false; 7242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 7342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 7442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (value.file != IMM) 7542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return false; 7642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 7742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (inst->src[arg].abs) { 7842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (value.type == BRW_REGISTER_TYPE_F) { 7942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt value.imm.f = fabs(value.imm.f); 8042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } else if (value.type == BRW_REGISTER_TYPE_D) { 8142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (value.imm.i < 0) 8242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt value.imm.i = -value.imm.i; 8342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 8442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 8542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 8642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (inst->src[arg].negate) { 8742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (value.type == BRW_REGISTER_TYPE_F) 8842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt value.imm.f = -value.imm.f; 8942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt else 9042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt value.imm.u = -value.imm.u; 9142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 9242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 9342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt switch (inst->opcode) { 9442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt case BRW_OPCODE_MOV: 9542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[arg] = value; 9642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 9742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 9842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt case BRW_OPCODE_MUL: 9942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt case BRW_OPCODE_ADD: 10042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (arg == 1) { 10142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[arg] = value; 10242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 10342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } else if (arg == 0 && inst->src[1].file != IMM) { 1048fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry /* Fit this constant in by commuting the operands. Exception: we 1058fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry * can't do this for 32-bit integer MUL because it's asymmetric. 1068fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry */ 1078fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry if (inst->opcode == BRW_OPCODE_MUL && 1088fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry (inst->src[1].type == BRW_REGISTER_TYPE_D || 1098fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry inst->src[1].type == BRW_REGISTER_TYPE_UD)) 1108fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry break; 11142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[0] = inst->src[1]; 11242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[1] = value; 11342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 11442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 11542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt break; 11642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 11742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt case BRW_OPCODE_CMP: 11842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (arg == 1) { 11942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[arg] = value; 12042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 12142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } else if (arg == 0 && inst->src[1].file != IMM) { 12242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt uint32_t new_cmod; 12342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 12442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt new_cmod = brw_swap_cmod(inst->conditional_mod); 12542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (new_cmod != ~0u) { 12642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* Fit this constant in by swapping the operands and 12742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * flipping the test. 12842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 12942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[0] = inst->src[1]; 13042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[1] = value; 13142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->conditional_mod = new_cmod; 13242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 13342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 13442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 13542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt break; 13642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 13742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt case BRW_OPCODE_SEL: 13842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (arg == 1) { 13942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[arg] = value; 14042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 14142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } else if (arg == 0 && inst->src[1].file != IMM) { 14242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[0] = inst->src[1]; 14342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[1] = value; 14442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 14542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* If this was predicated, flipping operands means 14642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * we also need to flip the predicate. 14742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 14842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { 14942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->predicate_inverse = !inst->predicate_inverse; 15042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 15142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return true; 15242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 15342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt break; 15442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 15542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt default: 15642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt break; 15742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 15842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 15942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return false; 16042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} 16142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 162cc9eb936c220267b6130b705fc696d05906a31dfEric Anholtstatic bool 163cc9eb936c220267b6130b705fc696d05906a31dfEric Anholttry_copy_propagation(struct intel_context *intel, 164cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt vec4_instruction *inst, int arg, src_reg *values[4]) 165cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt{ 166cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt /* For constant propagation, we only handle the same constant 167cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * across all 4 channels. Some day, we should handle the 8-bit 168cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * float vector format, which would let us constant propagate 169cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * vectors better. 170cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt */ 171cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt src_reg value = *values[0]; 172cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt for (int i = 1; i < 4; i++) { 173cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt /* This is equals() except we don't care about the swizzle. */ 174cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt if (value.file != values[i]->file || 175cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.reg != values[i]->reg || 176cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.reg_offset != values[i]->reg_offset || 177cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.type != values[i]->type || 178cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.negate != values[i]->negate || 179cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.abs != values[i]->abs) { 180cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt return false; 181cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt } 182cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt } 183cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt 184cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt /* Compute the swizzle of the original register by swizzling the 185cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * component loaded from each value according to the swizzle of 186cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * operand we're going to change. 187cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt */ 188cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt int s[4]; 189cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt for (int i = 0; i < 4; i++) { 190cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt s[i] = BRW_GET_SWZ(values[i]->swizzle, 191cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt BRW_GET_SWZ(inst->src[arg].swizzle, i)); 192cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt } 193cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]); 194cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt 195cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt if (value.file != UNIFORM && 19629361e14df8e5e92df747d52303da2c454e2caccEric Anholt value.file != GRF && 197cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.file != ATTR) 198cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt return false; 199cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt 200cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt if (inst->src[arg].abs) { 201cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.negate = false; 202cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt value.abs = true; 203cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt } 204cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt if (inst->src[arg].negate) 20520da49b2677fcf6a721b8a46e037a01e842fee61Paul Berry value.negate = !value.negate; 206cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt 207cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt /* FINISHME: We can't copy-propagate things that aren't normal 208cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * vec8s into gen6 math instructions, because of the weird src 209cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt * handling for those instructions. Just ignore them for now. 210cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt */ 211cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt if (intel->gen >= 6 && inst->is_math()) 212cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt return false; 213cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt 21431874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt /* We can't copy-propagate a UD negation into a condmod 21531874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt * instruction, because the condmod ends up looking at the 33-bit 21631874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt * signed accumulator value instead of the 32-bit value we wanted 21731874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt */ 21831874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt if (inst->conditional_mod && 21931874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt value.negate && 22031874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt value.type == BRW_REGISTER_TYPE_UD) 22131874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt return false; 22231874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt 22329361e14df8e5e92df747d52303da2c454e2caccEric Anholt /* Don't report progress if this is a noop. */ 22429361e14df8e5e92df747d52303da2c454e2caccEric Anholt if (value.equals(&inst->src[arg])) 22529361e14df8e5e92df747d52303da2c454e2caccEric Anholt return false; 22629361e14df8e5e92df747d52303da2c454e2caccEric Anholt 22705974cb8c6cce68344b5622c5cb55adf3e51164cKenneth Graunke value.type = inst->src[arg].type; 228cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt inst->src[arg] = value; 229cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt return true; 230cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt} 231cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt 23242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtbool 23342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtvec4_visitor::opt_copy_propagation() 23442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{ 23542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt bool progress = false; 23642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt src_reg *cur_value[virtual_grf_reg_count][4]; 23742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 23842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt memset(&cur_value, 0, sizeof(cur_value)); 23942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 24042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt foreach_list(node, &this->instructions) { 24142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt vec4_instruction *inst = (vec4_instruction *)node; 24242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 24342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* This pass only works on basic blocks. If there's flow 24442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * control, throw out all our information and start from 24542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * scratch. 24642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * 24742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * This should really be fixed by using a structure like in 24842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * src/glsl/opt_copy_propagation.cpp to track available copies. 24942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 25042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (!is_dominated_by_previous_instruction(inst)) { 25142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt memset(cur_value, 0, sizeof(cur_value)); 25242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt continue; 25342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 25442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 25542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* For each source arg, see if each component comes from a copy 25642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * from the same type file (IMM, GRF, UNIFORM), and try 25742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * optimizing out access to the copy result 25842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 25942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt for (int i = 2; i >= 0; i--) { 26042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* Copied values end up in GRFs, and we don't track reladdr 26142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * accesses. 26242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 26342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (inst->src[i].file != GRF || 26442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt inst->src[i].reladdr) 26542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt continue; 26642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 26739790b6450b770cd402bc08b9416cab67de3dedbEric Anholt int reg = (virtual_grf_reg_map[inst->src[i].reg] + 26839790b6450b770cd402bc08b9416cab67de3dedbEric Anholt inst->src[i].reg_offset); 26939790b6450b770cd402bc08b9416cab67de3dedbEric Anholt 27042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* Find the regs that each swizzle component came from. 27142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 27242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt src_reg *values[4]; 27342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt int c; 27442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt for (c = 0; c < 4; c++) { 27542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)]; 27642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 27742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* If there's no available copy for this channel, bail. 27842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * We could be more aggressive here -- some channels might 27942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * not get used based on the destination writemask. 28042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 28142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (!values[c]) 28242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt break; 28342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 28442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* We'll only be able to copy propagate if the sources are 28542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * all from the same file -- there's no ability to swizzle 28642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * 0 or 1 constants in with source registers like in i915. 28742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */ 28842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (c > 0 && values[c - 1]->file != values[c]->file) 28942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt break; 29042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 29142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 29242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (c != 4) 29342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt continue; 29442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 295cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt if (try_constant_propagation(inst, i, values) || 296cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt try_copy_propagation(intel, inst, i, values)) 29742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt progress = true; 29842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 29942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 30042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt /* Track available source registers. */ 3017ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke if (inst->dst.file == GRF) { 3027ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke const int reg = 3037ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset; 30407ee9f374f2946f852896e9264c7fa83eafc3f16Kenneth Graunke 3057ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke /* Update our destination's current channel values. For a direct copy, 3067ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke * the value is the newly propagated source. Otherwise, we don't know 3077ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke * the new value, so clear it. 3087ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke */ 3097ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke bool direct_copy = is_direct_copy(inst); 3107ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke for (int i = 0; i < 4; i++) { 3117ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke if (inst->dst.writemask & (1 << i)) { 3127ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL; 3137ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke } 31442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 31542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 3167ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke /* Clear the records for any registers whose current value came from 3177ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke * our destination's updated channels, as the two are no longer equal. 3187ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke */ 31942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (inst->dst.reladdr) 32042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt memset(cur_value, 0, sizeof(cur_value)); 32142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt else { 3226d0c018776977219e355c534eaafec53a30d993bEric Anholt for (int i = 0; i < virtual_grf_reg_count; i++) { 3236d0c018776977219e355c534eaafec53a30d993bEric Anholt for (int j = 0; j < 4; j++) { 324443c8d1ab7ddad9392046e041e4e9a4fda7cd6e7Kenneth Graunke if (inst->dst.writemask & (1 << j) && 3256d0c018776977219e355c534eaafec53a30d993bEric Anholt cur_value[i][j] && 3266d0c018776977219e355c534eaafec53a30d993bEric Anholt cur_value[i][j]->file == GRF && 3276d0c018776977219e355c534eaafec53a30d993bEric Anholt cur_value[i][j]->reg == inst->dst.reg && 32829361e14df8e5e92df747d52303da2c454e2caccEric Anholt cur_value[i][j]->reg_offset == inst->dst.reg_offset) { 3296d0c018776977219e355c534eaafec53a30d993bEric Anholt cur_value[i][j] = NULL; 3306d0c018776977219e355c534eaafec53a30d993bEric Anholt } 3316d0c018776977219e355c534eaafec53a30d993bEric Anholt } 3326d0c018776977219e355c534eaafec53a30d993bEric Anholt } 33342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 33442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 33542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt } 33642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 33742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt if (progress) 33842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt live_intervals_valid = false; 33942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 34042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt return progress; 34142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} 34242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt 34342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} /* namespace brw */ 344