1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_fs.h"
25#include "brw_fs_cfg.h"
26
27/** @file brw_fs_cse.cpp
28 *
29 * Support for local common subexpression elimination.
30 *
31 * See Muchnik's Advanced Compiler Design and Implementation, section
32 * 13.1 (p378).
33 */
34
35namespace {
36struct aeb_entry : public exec_node {
37   /** The instruction that generates the expression value. */
38   fs_inst *generator;
39
40   /** The temporary where the value is stored. */
41   fs_reg tmp;
42};
43}
44
45static bool
46is_expression(const fs_inst *const inst)
47{
48   switch (inst->opcode) {
49   case BRW_OPCODE_SEL:
50   case BRW_OPCODE_NOT:
51   case BRW_OPCODE_AND:
52   case BRW_OPCODE_OR:
53   case BRW_OPCODE_XOR:
54   case BRW_OPCODE_SHR:
55   case BRW_OPCODE_SHL:
56   case BRW_OPCODE_RSR:
57   case BRW_OPCODE_RSL:
58   case BRW_OPCODE_ASR:
59   case BRW_OPCODE_ADD:
60   case BRW_OPCODE_MUL:
61   case BRW_OPCODE_FRC:
62   case BRW_OPCODE_RNDU:
63   case BRW_OPCODE_RNDD:
64   case BRW_OPCODE_RNDE:
65   case BRW_OPCODE_RNDZ:
66   case BRW_OPCODE_LINE:
67   case BRW_OPCODE_PLN:
68   case BRW_OPCODE_MAD:
69   case FS_OPCODE_CINTERP:
70   case FS_OPCODE_LINTERP:
71      return true;
72   default:
73      return false;
74   }
75}
76
77static bool
78operands_match(fs_reg *xs, fs_reg *ys)
79{
80   return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
81}
82
83bool
84fs_visitor::opt_cse_local(fs_bblock *block, exec_list *aeb)
85{
86   bool progress = false;
87
88   void *mem_ctx = ralloc_context(this->mem_ctx);
89
90   for (fs_inst *inst = block->start;
91	inst != block->end->next;
92	inst = (fs_inst *) inst->next) {
93
94      /* Skip some cases. */
95      if (is_expression(inst) && !inst->predicated && inst->mlen == 0 &&
96          !inst->force_uncompressed && !inst->force_sechalf &&
97          !inst->conditional_mod)
98      {
99	 bool found = false;
100
101	 aeb_entry *entry;
102	 foreach_list(entry_node, aeb) {
103	    entry = (aeb_entry *) entry_node;
104
105	    /* Match current instruction's expression against those in AEB. */
106	    if (inst->opcode == entry->generator->opcode &&
107		inst->saturate == entry->generator->saturate &&
108		operands_match(entry->generator->src, inst->src)) {
109
110	       found = true;
111	       progress = true;
112	       break;
113	    }
114	 }
115
116	 if (!found) {
117	    /* Our first sighting of this expression.  Create an entry. */
118	    aeb_entry *entry = ralloc(mem_ctx, aeb_entry);
119	    entry->tmp = reg_undef;
120	    entry->generator = inst;
121	    aeb->push_tail(entry);
122	 } else {
123	    /* This is at least our second sighting of this expression.
124	     * If we don't have a temporary already, make one.
125	     */
126	    bool no_existing_temp = entry->tmp.file == BAD_FILE;
127	    if (no_existing_temp) {
128	       entry->tmp = fs_reg(this, glsl_type::float_type);
129	       entry->tmp.type = inst->dst.type;
130
131	       fs_inst *copy = new(ralloc_parent(inst))
132		  fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
133	       entry->generator->insert_after(copy);
134	       entry->generator->dst = entry->tmp;
135	    }
136
137	    /* dest <- temp */
138	    fs_inst *copy = new(ralloc_parent(inst))
139	       fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
140	    inst->replace_with(copy);
141
142	    /* Appending an instruction may have changed our bblock end. */
143	    if (inst == block->end) {
144	       block->end = copy;
145	    }
146
147	    /* Continue iteration with copy->next */
148	    inst = copy;
149	 }
150      }
151
152      /* Kill all AEB entries that use the destination. */
153      foreach_list_safe(entry_node, aeb) {
154	 aeb_entry *entry = (aeb_entry *)entry_node;
155
156	 for (int i = 0; i < 3; i++) {
157            if (inst->overwrites_reg(entry->generator->src[i])) {
158	       entry->remove();
159	       ralloc_free(entry);
160	       break;
161	    }
162	 }
163      }
164   }
165
166   ralloc_free(mem_ctx);
167
168   if (progress)
169      this->live_intervals_valid = false;
170
171   return progress;
172}
173
174bool
175fs_visitor::opt_cse()
176{
177   bool progress = false;
178
179   fs_cfg cfg(this);
180
181   for (int b = 0; b < cfg.num_blocks; b++) {
182      fs_bblock *block = cfg.blocks[b];
183      exec_list aeb;
184
185      progress = opt_cse_local(block, &aeb) || progress;
186   }
187
188   return progress;
189}
190