i915_fpc_optimize.c revision 053af6ac8cda226a62844fc014ed9f133557c111
1/**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "i915_reg.h"
29#include "i915_context.h"
30#include "i915_fpc.h"
31
32#include "pipe/p_shader_tokens.h"
33#include "util/u_math.h"
34#include "util/u_memory.h"
35#include "util/u_string.h"
36#include "tgsi/tgsi_parse.h"
37#include "tgsi/tgsi_dump.h"
38
39static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
40{
41   return (d1->Register.File == d2->Register.File &&
42           d1->Register.Indirect == d2->Register.Indirect &&
43           d1->Register.Dimension == d2->Register.Dimension &&
44           d1->Register.Index == d2->Register.Index);
45}
46
47static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
48{
49   return (d1->Register.File == d2->Register.File &&
50           d1->Register.Indirect == d2->Register.Indirect &&
51           d1->Register.Dimension == d2->Register.Dimension &&
52           d1->Register.Index == d2->Register.Index &&
53           d1->Register.Absolute == d2->Register.Absolute &&
54           d1->Register.Negate == d2->Register.Negate);
55}
56
57static boolean is_unswizzled(struct i915_full_src_register* r,
58                             int sx,
59                             int sy,
60                             int sz,
61                             int sw)
62{
63   if (sx && r->Register.SwizzleX != TGSI_SWIZZLE_X)
64      return FALSE;
65   if (sy && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
66      return FALSE;
67   if (sz && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
68      return FALSE;
69   if (sw && r->Register.SwizzleW != TGSI_SWIZZLE_W)
70      return FALSE;
71   return FALSE;
72}
73
74/*
75 * Optimize away things like:
76 *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
77 *    MOV OUT[0].w, TEMP[2]
78 * into:
79 *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
80 * This is useful for optimizing texenv.
81 */
82static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next)
83{
84   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
85        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
86        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
87        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
88        current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
89        next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
90        same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
91        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
92        is_unswizzled(&current->FullInstruction.Src[0], 1, 1, 1, 0) &&
93        is_unswizzled(&current->FullInstruction.Src[1], 1, 1, 1, 0) &&
94        is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) )
95   {
96      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
97      current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
98      current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE;
99      current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
100      return;
101   }
102
103   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
104        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
105        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
106        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
107        current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
108        next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
109        same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
110        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
111        is_unswizzled(&current->FullInstruction.Src[0], 1, 1, 1, 0) &&
112        is_unswizzled(&current->FullInstruction.Src[1], 1, 1, 1, 0) &&
113        is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) )
114   {
115      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
116      current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
117      current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE;
118      current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W;
119      return;
120   }
121}
122
123static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
124{
125   o->File      = i->File;
126   o->Indirect  = i->Indirect;
127   o->Dimension = i->Dimension;
128   o->Index     = i->Index;
129   o->SwizzleX  = i->SwizzleX;
130   o->SwizzleY  = i->SwizzleY;
131   o->SwizzleZ  = i->SwizzleZ;
132   o->SwizzleW  = i->SwizzleW;
133   o->Absolute  = i->Absolute;
134   o->Negate    = i->Negate;
135}
136
137static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
138{
139   o->File      = i->File;
140   o->WriteMask = i->WriteMask;
141   o->Indirect  = i->Indirect;
142   o->Dimension = i->Dimension;
143   o->Index     = i->Index;
144}
145
146static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
147{
148   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
149   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
150
151   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
152
153   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
154   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
155   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
156}
157
158static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
159{
160   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
161      memcpy(o, i, sizeof(*o));
162   else
163      copy_instruction(&o->FullInstruction, &i->FullInstruction);
164
165}
166
167struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
168{
169   struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
170   out_tokens->NumTokens = 0;
171   struct tgsi_parse_context parse;
172   int i = 0;
173
174   /* Count the tokens */
175   tgsi_parse_init( &parse, tokens );
176   while( !tgsi_parse_end_of_tokens( &parse ) ) {
177      tgsi_parse_token( &parse );
178      out_tokens->NumTokens++;
179   }
180   tgsi_parse_free (&parse);
181
182   /* Allocate our tokens */
183   out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
184
185   tgsi_parse_init( &parse, tokens );
186   while( !tgsi_parse_end_of_tokens( &parse ) ) {
187      tgsi_parse_token( &parse );
188      copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
189
190      if (i > 0)
191         i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
192
193      i++;
194   }
195   tgsi_parse_free (&parse);
196
197   return out_tokens;
198}
199
200void i915_optimize_free(struct i915_token_list* tokens)
201{
202   free(tokens->Tokens);
203   free(tokens);
204}
205
206
207