1/**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "i915_reg.h"
29#include "i915_context.h"
30#include "i915_fpc.h"
31
32#include "pipe/p_shader_tokens.h"
33#include "util/u_math.h"
34#include "util/u_memory.h"
35#include "util/u_string.h"
36#include "tgsi/tgsi_parse.h"
37#include "tgsi/tgsi_dump.h"
38
39static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
40{
41   return (s1->Register.File == d1->Register.File &&
42           s1->Register.Indirect == d1->Register.Indirect &&
43           s1->Register.Dimension == d1->Register.Dimension &&
44           s1->Register.Index == d1->Register.Index);
45}
46
47static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
48{
49   return (d1->Register.File == d2->Register.File &&
50           d1->Register.Indirect == d2->Register.Indirect &&
51           d1->Register.Dimension == d2->Register.Dimension &&
52           d1->Register.Index == d2->Register.Index);
53}
54
55static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
56{
57   return (d1->Register.File == d2->Register.File &&
58           d1->Register.Indirect == d2->Register.Indirect &&
59           d1->Register.Dimension == d2->Register.Dimension &&
60           d1->Register.Index == d2->Register.Index &&
61           d1->Register.Absolute == d2->Register.Absolute &&
62           d1->Register.Negate == d2->Register.Negate);
63}
64
65static boolean has_destination(unsigned opcode)
66{
67   return (opcode != TGSI_OPCODE_NOP &&
68           opcode != TGSI_OPCODE_KIL &&
69           opcode != TGSI_OPCODE_KILP &&
70           opcode != TGSI_OPCODE_END &&
71           opcode != TGSI_OPCODE_RET);
72}
73
74static boolean is_unswizzled(struct i915_full_src_register* r,
75                             unsigned write_mask)
76{
77   if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
78      return FALSE;
79   if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
80      return FALSE;
81   if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
82      return FALSE;
83   if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
84      return FALSE;
85   return TRUE;
86}
87
88static boolean op_commutes(unsigned opcode)
89{
90   switch(opcode)
91   {
92      case TGSI_OPCODE_ADD:
93      case TGSI_OPCODE_MUL:
94      case TGSI_OPCODE_DP2:
95      case TGSI_OPCODE_DP3:
96      case TGSI_OPCODE_DP4:
97         return TRUE;
98   }
99   return FALSE;
100}
101
102static unsigned op_neutral_element(unsigned opcode)
103{
104   switch(opcode)
105   {
106      case TGSI_OPCODE_ADD:
107         return TGSI_SWIZZLE_ZERO;
108      case TGSI_OPCODE_MUL:
109      case TGSI_OPCODE_DP2:
110      case TGSI_OPCODE_DP3:
111      case TGSI_OPCODE_DP4:
112         return TGSI_SWIZZLE_ONE;
113   }
114
115   debug_printf("Unknown opcode %d\n",opcode);
116   return TGSI_SWIZZLE_ZERO;
117}
118
119/*
120 * Sets the swizzle to the neutral element for the operation for the bits
121 * of writemask which are set, swizzle to identity otherwise.
122 */
123static void set_neutral_element_swizzle(struct i915_full_src_register* r,
124                                        unsigned write_mask,
125                                        unsigned neutral)
126{
127   if ( write_mask & TGSI_WRITEMASK_X )
128      r->Register.SwizzleX = neutral;
129   else
130      r->Register.SwizzleX = TGSI_SWIZZLE_X;
131
132   if ( write_mask & TGSI_WRITEMASK_Y )
133      r->Register.SwizzleY = neutral;
134   else
135      r->Register.SwizzleY = TGSI_SWIZZLE_Y;
136
137   if ( write_mask & TGSI_WRITEMASK_Z )
138      r->Register.SwizzleZ = neutral;
139   else
140      r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
141
142   if ( write_mask & TGSI_WRITEMASK_W )
143      r->Register.SwizzleW = neutral;
144   else
145      r->Register.SwizzleW = TGSI_SWIZZLE_W;
146}
147
148static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
149{
150   o->File      = i->File;
151   o->Indirect  = i->Indirect;
152   o->Dimension = i->Dimension;
153   o->Index     = i->Index;
154   o->SwizzleX  = i->SwizzleX;
155   o->SwizzleY  = i->SwizzleY;
156   o->SwizzleZ  = i->SwizzleZ;
157   o->SwizzleW  = i->SwizzleW;
158   o->Absolute  = i->Absolute;
159   o->Negate    = i->Negate;
160}
161
162static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
163{
164   o->File      = i->File;
165   o->WriteMask = i->WriteMask;
166   o->Indirect  = i->Indirect;
167   o->Dimension = i->Dimension;
168   o->Index     = i->Index;
169}
170
171static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
172{
173   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
174   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
175
176   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
177
178   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
179   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
180   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
181}
182
183static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
184{
185   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
186      memcpy(o, i, sizeof(*o));
187   else
188      copy_instruction(&o->FullInstruction, &i->FullInstruction);
189
190}
191
192/*
193 * Optimize away things like:
194 *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
195 *    MOV OUT[0].w, TEMP[2]
196 * into:
197 *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
198 * This is useful for optimizing texenv.
199 */
200static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
201{
202   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
203        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
204        op_commutes(current->FullInstruction.Instruction.Opcode) &&
205        current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
206        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
207        same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
208        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
209        !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
210        is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
211        is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
212        is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
213   {
214      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
215
216      set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
217      set_neutral_element_swizzle(&current->FullInstruction.Src[0],
218                                  next->FullInstruction.Dst[0].Register.WriteMask,
219                                  op_neutral_element(current->FullInstruction.Instruction.Opcode));
220
221      current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
222                                                           next->FullInstruction.Dst[0].Register.WriteMask;
223      return;
224   }
225
226   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
227        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
228        op_commutes(current->FullInstruction.Instruction.Opcode) &&
229        current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
230        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
231        same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
232        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
233        !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
234        is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
235        is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
236        is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
237   {
238      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
239
240      set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
241      set_neutral_element_swizzle(&current->FullInstruction.Src[1],
242                                  next->FullInstruction.Dst[0].Register.WriteMask,
243                                  op_neutral_element(current->FullInstruction.Instruction.Opcode));
244
245      current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
246                                                           next->FullInstruction.Dst[0].Register.WriteMask;
247      return;
248   }
249}
250
251/*
252 * Optimize away things like:
253 *    MOV TEMP[0].xyz TEMP[0].xyzx
254 * into:
255 *    NOP
256 */
257static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
258{
259   union i915_full_token current;
260   copy_token(&current , tgsi_current);
261   if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
262        current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
263        has_destination(current.FullInstruction.Instruction.Opcode) &&
264        current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
265        current.FullInstruction.Src[0].Register.Absolute == 0 &&
266        current.FullInstruction.Src[0].Register.Negate == 0 &&
267        is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
268        same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
269   {
270      return TRUE;
271   }
272   return FALSE;
273}
274
275/*
276 * Optimize away things like:
277 *    *** TEMP[0], TEMP[1], TEMP[2]
278 *    MOV OUT[0] TEMP[0]
279 * into:
280 *    *** OUT[0], TEMP[1], TEMP[2]
281 */
282static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
283{
284   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
285        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
286        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
287        has_destination(current->FullInstruction.Instruction.Opcode) &&
288        next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
289        next->FullInstruction.Src[0].Register.Absolute == 0 &&
290        next->FullInstruction.Src[0].Register.Negate == 0 &&
291        next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT &&
292        is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
293        current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
294        same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
295   {
296      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
297
298      current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
299      return;
300   }
301}
302
303struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
304{
305   struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
306   struct tgsi_parse_context parse;
307   int i = 0;
308
309   out_tokens->NumTokens = 0;
310
311   /* Count the tokens */
312   tgsi_parse_init( &parse, tokens );
313   while( !tgsi_parse_end_of_tokens( &parse ) ) {
314      tgsi_parse_token( &parse );
315      out_tokens->NumTokens++;
316   }
317   tgsi_parse_free (&parse);
318
319   /* Allocate our tokens */
320   out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
321
322   tgsi_parse_init( &parse, tokens );
323   while( !tgsi_parse_end_of_tokens( &parse ) ) {
324      tgsi_parse_token( &parse );
325
326      if (i915_fpc_useless_mov(&parse.FullToken)) {
327         out_tokens->NumTokens--;
328         continue;
329      }
330
331      copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
332
333      if (i > 0) {
334         i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
335         i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
336      }
337      i++;
338   }
339   tgsi_parse_free (&parse);
340
341   return out_tokens;
342}
343
344void i915_optimize_free(struct i915_token_list* tokens)
345{
346   free(tokens->Tokens);
347   free(tokens);
348}
349
350
351