142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt/*
242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Copyright © 2011 Intel Corporation
342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt *
442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Permission is hereby granted, free of charge, to any person obtaining a
542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * copy of this software and associated documentation files (the "Software"),
642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * to deal in the Software without restriction, including without limitation
742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * the rights to use, copy, modify, merge, publish, distribute, sublicense,
842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * and/or sell copies of the Software, and to permit persons to whom the
942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Software is furnished to do so, subject to the following conditions:
1042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt *
1142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * The above copyright notice and this permission notice (including the next
1242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * paragraph) shall be included in all copies or substantial portions of the
1342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Software.
1442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt *
1542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * IN THE SOFTWARE.
2242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */
2342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
2442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt/**
2542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * @file brw_vec4_copy_propagation.cpp
2642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt *
2742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * Implements tracking of values copied between registers, and
2842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * optimizations based on that: copy propagation and constant
2942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt * propagation.
3042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt */
3142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
3242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt#include "brw_vec4.h"
3342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtextern "C" {
3442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt#include "main/macros.h"
3542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt}
3642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
3742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtnamespace brw {
3842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
3942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtstatic bool
4042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtis_direct_copy(vec4_instruction *inst)
4142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{
4242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   return (inst->opcode == BRW_OPCODE_MOV &&
4342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   !inst->predicate &&
4442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   inst->dst.file == GRF &&
4542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   !inst->saturate &&
4642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   !inst->dst.reladdr &&
4742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   !inst->src[0].reladdr &&
4842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   inst->dst.type == inst->src[0].type);
4942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt}
5042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
5142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtstatic bool
5242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtis_dominated_by_previous_instruction(vec4_instruction *inst)
5342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{
5442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   return (inst->opcode != BRW_OPCODE_DO &&
5542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   inst->opcode != BRW_OPCODE_WHILE &&
5642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   inst->opcode != BRW_OPCODE_ELSE &&
5742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	   inst->opcode != BRW_OPCODE_ENDIF);
5842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt}
5942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
6042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtstatic bool
6142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholttry_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
6242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{
6342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   /* For constant propagation, we only handle the same constant
6442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt    * across all 4 channels.  Some day, we should handle the 8-bit
6542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt    * float vector format, which would let us constant propagate
6642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt    * vectors better.
6742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt    */
6842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   src_reg value = *values[0];
6942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   for (int i = 1; i < 4; i++) {
7042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (!value.equals(values[i]))
7142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 return false;
7242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   }
7342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
7442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   if (value.file != IMM)
7542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      return false;
7642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
7742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   if (inst->src[arg].abs) {
7842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (value.type == BRW_REGISTER_TYPE_F) {
7942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 value.imm.f = fabs(value.imm.f);
8042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      } else if (value.type == BRW_REGISTER_TYPE_D) {
8142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 if (value.imm.i < 0)
8242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    value.imm.i = -value.imm.i;
8342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
8442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   }
8542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
8642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   if (inst->src[arg].negate) {
8742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (value.type == BRW_REGISTER_TYPE_F)
8842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 value.imm.f = -value.imm.f;
8942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      else
9042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 value.imm.u = -value.imm.u;
9142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   }
9242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
9342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   switch (inst->opcode) {
9442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   case BRW_OPCODE_MOV:
9542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      inst->src[arg] = value;
9642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      return true;
9742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
9842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   case BRW_OPCODE_MUL:
9942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   case BRW_OPCODE_ADD:
10042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (arg == 1) {
10142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[arg] = value;
10242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 return true;
10342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      } else if (arg == 0 && inst->src[1].file != IMM) {
1048fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	 /* Fit this constant in by commuting the operands.  Exception: we
1058fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	  * can't do this for 32-bit integer MUL because it's asymmetric.
1068fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	  */
1078fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	 if (inst->opcode == BRW_OPCODE_MUL &&
1088fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	     (inst->src[1].type == BRW_REGISTER_TYPE_D ||
1098fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	      inst->src[1].type == BRW_REGISTER_TYPE_UD))
1108fad0f99989866eeb72889a84f12f6f817334ddbPaul Berry	    break;
11142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[0] = inst->src[1];
11242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[1] = value;
11342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 return true;
11442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
11542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      break;
11642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
11742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   case BRW_OPCODE_CMP:
11842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (arg == 1) {
11942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[arg] = value;
12042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 return true;
12142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      } else if (arg == 0 && inst->src[1].file != IMM) {
12242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 uint32_t new_cmod;
12342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
12442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 new_cmod = brw_swap_cmod(inst->conditional_mod);
12542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 if (new_cmod != ~0u) {
12642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    /* Fit this constant in by swapping the operands and
12742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     * flipping the test.
12842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     */
12942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    inst->src[0] = inst->src[1];
13042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    inst->src[1] = value;
13142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    inst->conditional_mod = new_cmod;
13242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    return true;
13342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 }
13442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
13542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      break;
13642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
13742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   case BRW_OPCODE_SEL:
13842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (arg == 1) {
13942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[arg] = value;
14042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 return true;
14142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      } else if (arg == 0 && inst->src[1].file != IMM) {
14242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[0] = inst->src[1];
14342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 inst->src[1] = value;
14442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
14542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 /* If this was predicated, flipping operands means
14642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	  * we also need to flip the predicate.
14742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	  */
14842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
14942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    inst->predicate_inverse = !inst->predicate_inverse;
15042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 }
15142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 return true;
15242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
15342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      break;
15442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
15542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   default:
15642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      break;
15742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   }
15842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
15942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   return false;
16042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt}
16142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
162cc9eb936c220267b6130b705fc696d05906a31dfEric Anholtstatic bool
163cc9eb936c220267b6130b705fc696d05906a31dfEric Anholttry_copy_propagation(struct intel_context *intel,
164cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt		     vec4_instruction *inst, int arg, src_reg *values[4])
165cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt{
166cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   /* For constant propagation, we only handle the same constant
167cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * across all 4 channels.  Some day, we should handle the 8-bit
168cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * float vector format, which would let us constant propagate
169cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * vectors better.
170cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    */
171cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   src_reg value = *values[0];
172cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   for (int i = 1; i < 4; i++) {
173cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      /* This is equals() except we don't care about the swizzle. */
174cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      if (value.file != values[i]->file ||
175cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	  value.reg != values[i]->reg ||
176cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	  value.reg_offset != values[i]->reg_offset ||
177cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	  value.type != values[i]->type ||
178cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	  value.negate != values[i]->negate ||
179cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	  value.abs != values[i]->abs) {
180cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	 return false;
181cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      }
182cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   }
183cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt
184cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   /* Compute the swizzle of the original register by swizzling the
185cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * component loaded from each value according to the swizzle of
186cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * operand we're going to change.
187cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    */
188cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   int s[4];
189cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   for (int i = 0; i < 4; i++) {
190cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      s[i] = BRW_GET_SWZ(values[i]->swizzle,
191cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt			 BRW_GET_SWZ(inst->src[arg].swizzle, i));
192cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   }
193cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
194cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt
195cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   if (value.file != UNIFORM &&
19629361e14df8e5e92df747d52303da2c454e2caccEric Anholt       value.file != GRF &&
197cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt       value.file != ATTR)
198cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      return false;
199cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt
200cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   if (inst->src[arg].abs) {
201cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      value.negate = false;
202cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      value.abs = true;
203cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   }
204cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   if (inst->src[arg].negate)
20520da49b2677fcf6a721b8a46e037a01e842fee61Paul Berry      value.negate = !value.negate;
206cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt
207cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   /* FINISHME: We can't copy-propagate things that aren't normal
208cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * vec8s into gen6 math instructions, because of the weird src
209cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    * handling for those instructions.  Just ignore them for now.
210cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt    */
211cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   if (intel->gen >= 6 && inst->is_math())
212cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt      return false;
213cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt
21431874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt   /* We can't copy-propagate a UD negation into a condmod
21531874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt    * instruction, because the condmod ends up looking at the 33-bit
21631874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt    * signed accumulator value instead of the 32-bit value we wanted
21731874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt    */
21831874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt   if (inst->conditional_mod &&
21931874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt       value.negate &&
22031874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt       value.type == BRW_REGISTER_TYPE_UD)
22131874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt      return false;
22231874f074c2eaf2a9421c57f0798c79078d296c4Eric Anholt
22329361e14df8e5e92df747d52303da2c454e2caccEric Anholt   /* Don't report progress if this is a noop. */
22429361e14df8e5e92df747d52303da2c454e2caccEric Anholt   if (value.equals(&inst->src[arg]))
22529361e14df8e5e92df747d52303da2c454e2caccEric Anholt      return false;
22629361e14df8e5e92df747d52303da2c454e2caccEric Anholt
22705974cb8c6cce68344b5622c5cb55adf3e51164cKenneth Graunke   value.type = inst->src[arg].type;
228cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   inst->src[arg] = value;
229cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt   return true;
230cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt}
231cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt
23242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtbool
23342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholtvec4_visitor::opt_copy_propagation()
23442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt{
23542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   bool progress = false;
23642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   src_reg *cur_value[virtual_grf_reg_count][4];
23742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
23842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   memset(&cur_value, 0, sizeof(cur_value));
23942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
24042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   foreach_list(node, &this->instructions) {
24142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      vec4_instruction *inst = (vec4_instruction *)node;
24242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
24342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      /* This pass only works on basic blocks.  If there's flow
24442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       * control, throw out all our information and start from
24542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       * scratch.
24642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       *
24742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       * This should really be fixed by using a structure like in
24842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       * src/glsl/opt_copy_propagation.cpp to track available copies.
24942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       */
25042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      if (!is_dominated_by_previous_instruction(inst)) {
25142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 memset(cur_value, 0, sizeof(cur_value));
25242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 continue;
25342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
25442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
25542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      /* For each source arg, see if each component comes from a copy
25642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       * from the same type file (IMM, GRF, UNIFORM), and try
25742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       * optimizing out access to the copy result
25842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt       */
25942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      for (int i = 2; i >= 0; i--) {
26042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 /* Copied values end up in GRFs, and we don't track reladdr
26142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	  * accesses.
26242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	  */
26342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 if (inst->src[i].file != GRF ||
26442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     inst->src[i].reladdr)
26542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    continue;
26642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
26739790b6450b770cd402bc08b9416cab67de3dedbEric Anholt	 int reg = (virtual_grf_reg_map[inst->src[i].reg] +
26839790b6450b770cd402bc08b9416cab67de3dedbEric Anholt		    inst->src[i].reg_offset);
26939790b6450b770cd402bc08b9416cab67de3dedbEric Anholt
27042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 /* Find the regs that each swizzle component came from.
27142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	  */
27242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 src_reg *values[4];
27342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 int c;
27442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 for (c = 0; c < 4; c++) {
27542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
27642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
27742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    /* If there's no available copy for this channel, bail.
27842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     * We could be more aggressive here -- some channels might
27942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     * not get used based on the destination writemask.
28042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     */
28142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    if (!values[c])
28242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	       break;
28342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
28442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    /* We'll only be able to copy propagate if the sources are
28542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     * all from the same file -- there's no ability to swizzle
28642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     * 0 or 1 constants in with source registers like in i915.
28742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	     */
28842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    if (c > 0 && values[c - 1]->file != values[c]->file)
28942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	       break;
29042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 }
29142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
29242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 if (c != 4)
29342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    continue;
29442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
295cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	 if (try_constant_propagation(inst, i, values) ||
296cc9eb936c220267b6130b705fc696d05906a31dfEric Anholt	     try_copy_propagation(intel, inst, i, values))
29742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    progress = true;
29842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
29942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
30042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      /* Track available source registers. */
3017ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke      if (inst->dst.file == GRF) {
3027ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	 const int reg =
3037ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	    virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
30407ee9f374f2946f852896e9264c7fa83eafc3f16Kenneth Graunke
3057ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	 /* Update our destination's current channel values.  For a direct copy,
3067ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	  * the value is the newly propagated source.  Otherwise, we don't know
3077ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	  * the new value, so clear it.
3087ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	  */
3097ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	 bool direct_copy = is_direct_copy(inst);
3107ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	 for (int i = 0; i < 4; i++) {
3117ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	    if (inst->dst.writemask & (1 << i)) {
3127ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	       cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL;
3137ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	    }
31442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 }
31542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
3167ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	 /* Clear the records for any registers whose current value came from
3177ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	  * our destination's updated channels, as the two are no longer equal.
3187ccf04ebcf6284a68ab6c571dabc5f3e0c6b740bKenneth Graunke	  */
31942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 if (inst->dst.reladdr)
32042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	    memset(cur_value, 0, sizeof(cur_value));
32142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 else {
3226d0c018776977219e355c534eaafec53a30d993bEric Anholt	    for (int i = 0; i < virtual_grf_reg_count; i++) {
3236d0c018776977219e355c534eaafec53a30d993bEric Anholt	       for (int j = 0; j < 4; j++) {
324443c8d1ab7ddad9392046e041e4e9a4fda7cd6e7Kenneth Graunke		  if (inst->dst.writemask & (1 << j) &&
3256d0c018776977219e355c534eaafec53a30d993bEric Anholt		      cur_value[i][j] &&
3266d0c018776977219e355c534eaafec53a30d993bEric Anholt		      cur_value[i][j]->file == GRF &&
3276d0c018776977219e355c534eaafec53a30d993bEric Anholt		      cur_value[i][j]->reg == inst->dst.reg &&
32829361e14df8e5e92df747d52303da2c454e2caccEric Anholt		      cur_value[i][j]->reg_offset == inst->dst.reg_offset) {
3296d0c018776977219e355c534eaafec53a30d993bEric Anholt		     cur_value[i][j] = NULL;
3306d0c018776977219e355c534eaafec53a30d993bEric Anholt		  }
3316d0c018776977219e355c534eaafec53a30d993bEric Anholt	       }
3326d0c018776977219e355c534eaafec53a30d993bEric Anholt	    }
33342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt	 }
33442ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      }
33542ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   }
33642ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
33742ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   if (progress)
33842ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt      live_intervals_valid = false;
33942ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
34042ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt   return progress;
34142ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt}
34242ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt
34342ce13195b94d0d51ca8e7fa5eed07fde8f37988Eric Anholt} /* namespace brw */
344