1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2012 Intel Corporation
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"),
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions:
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software.
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN THE SOFTWARE.
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_fs.h"
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "brw_fs_cfg.h"
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace { /* avoid conflict with opt_copy_propagation_elements */
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct acp_entry : public exec_node {
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fs_reg dst;
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fs_reg src;
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (inst->src[arg].file != entry->dst.file ||
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       inst->src[arg].reg != entry->dst.reg ||
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       inst->src[arg].reg_offset != entry->dst.reg_offset) {
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (inst->conditional_mod &&
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       inst->src[arg].type == BRW_REGISTER_TYPE_UD &&
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       entry->src.negate)
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool has_source_modifiers = entry->src.abs || entry->src.negate;
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (intel->gen == 6 && inst->is_math() &&
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       (has_source_modifiers || entry->src.file == UNIFORM))
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inst->src[arg].file = entry->src.file;
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inst->src[arg].reg = entry->src.reg;
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   inst->src[arg].reg_offset = entry->src.reg_offset;
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!inst->src[arg].abs) {
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inst->src[arg].abs = entry->src.abs;
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      inst->src[arg].negate ^= entry->src.negate;
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** @file brw_fs_copy_propagation.cpp
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Support for local copy propagation by walking the list of instructions
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and maintaining the ACP table of available copies for propagation.
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * See Muchnik's Advanced Compiler Design and Implementation, section
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 12.5 (p356).
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* Walks a basic block and does copy propagation on it using the acp
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * list.
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::opt_copy_propagate_local(void *mem_ctx,
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				     fs_bblock *block, exec_list *acp)
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool progress = false;
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (fs_inst *inst = block->start;
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst != block->end->next;
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	inst = (fs_inst *)inst->next) {
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* Try propagating into this instruction. */
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      foreach_list(entry_node, acp) {
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 acp_entry *entry = (acp_entry *)entry_node;
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 for (int i = 0; i < 3; i++) {
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    if (try_copy_propagate(inst, i, entry))
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       progress = true;
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* kill the destination from the ACP */
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (inst->dst.file == GRF) {
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 foreach_list_safe(entry_node, acp) {
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    acp_entry *entry = (acp_entry *)entry_node;
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    if (inst->overwrites_reg(entry->dst) ||
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                inst->overwrites_reg(entry->src)) {
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       entry->remove();
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    }
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* If this instruction is a raw copy, add it to the ACP. */
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (inst->opcode == BRW_OPCODE_MOV &&
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  inst->dst.file == GRF &&
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  ((inst->src[0].file == GRF &&
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    (inst->src[0].reg != inst->dst.reg ||
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	     inst->src[0].reg_offset != inst->dst.reg_offset)) ||
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	   inst->src[0].file == UNIFORM) &&
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  inst->src[0].type == inst->dst.type &&
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  !inst->saturate &&
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  !inst->predicated &&
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  !inst->force_uncompressed &&
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  !inst->force_sechalf &&
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	  inst->src[0].smear == -1) {
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 acp_entry *entry = ralloc(mem_ctx, acp_entry);
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 entry->dst = inst->dst;
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 entry->src = inst->src[0];
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 acp->push_tail(entry);
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return progress;
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgfs_visitor::opt_copy_propagate()
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool progress = false;
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void *mem_ctx = ralloc_context(this->mem_ctx);
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   fs_cfg cfg(this);
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (int b = 0; b < cfg.num_blocks; b++) {
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      fs_bblock *block = cfg.blocks[b];
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      exec_list acp;
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      progress = opt_copy_propagate_local(mem_ctx, block, &acp) || progress;
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ralloc_free(mem_ctx);
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (progress)
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      live_intervals_valid = false;
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return progress;
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
156