1/************************************************************************** 2 * 3 * Copyright 2011 The Chromium OS authors. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "i915_reg.h" 29#include "i915_context.h" 30#include "i915_fpc.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "util/u_math.h" 34#include "util/u_memory.h" 35#include "util/u_string.h" 36#include "tgsi/tgsi_parse.h" 37#include "tgsi/tgsi_dump.h" 38 39static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1) 40{ 41 return (s1->Register.File == d1->Register.File && 42 s1->Register.Indirect == d1->Register.Indirect && 43 s1->Register.Dimension == d1->Register.Dimension && 44 s1->Register.Index == d1->Register.Index); 45} 46 47static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) 48{ 49 return (d1->Register.File == d2->Register.File && 50 d1->Register.Indirect == d2->Register.Indirect && 51 d1->Register.Dimension == d2->Register.Dimension && 52 d1->Register.Index == d2->Register.Index); 53} 54 55static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) 56{ 57 return (d1->Register.File == d2->Register.File && 58 d1->Register.Indirect == d2->Register.Indirect && 59 d1->Register.Dimension == d2->Register.Dimension && 60 d1->Register.Index == d2->Register.Index && 61 d1->Register.Absolute == d2->Register.Absolute && 62 d1->Register.Negate == d2->Register.Negate); 63} 64 65static boolean has_destination(unsigned opcode) 66{ 67 return (opcode != TGSI_OPCODE_NOP && 68 opcode != TGSI_OPCODE_KIL && 69 opcode != TGSI_OPCODE_KILP && 70 opcode != TGSI_OPCODE_END && 71 opcode != TGSI_OPCODE_RET); 72} 73 74static boolean is_unswizzled(struct i915_full_src_register* r, 75 unsigned write_mask) 76{ 77 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) 78 return FALSE; 79 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) 80 return FALSE; 81 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) 82 return FALSE; 83 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) 84 return FALSE; 85 return TRUE; 86} 87 88static boolean op_commutes(unsigned opcode) 89{ 90 switch(opcode) 91 { 92 case TGSI_OPCODE_ADD: 93 case TGSI_OPCODE_MUL: 94 case TGSI_OPCODE_DP2: 95 case TGSI_OPCODE_DP3: 96 case TGSI_OPCODE_DP4: 97 return TRUE; 98 } 99 return FALSE; 100} 101 102static unsigned op_neutral_element(unsigned opcode) 103{ 104 switch(opcode) 105 { 106 case TGSI_OPCODE_ADD: 107 return TGSI_SWIZZLE_ZERO; 108 case TGSI_OPCODE_MUL: 109 case TGSI_OPCODE_DP2: 110 case TGSI_OPCODE_DP3: 111 case TGSI_OPCODE_DP4: 112 return TGSI_SWIZZLE_ONE; 113 } 114 115 debug_printf("Unknown opcode %d\n",opcode); 116 return TGSI_SWIZZLE_ZERO; 117} 118 119/* 120 * Sets the swizzle to the neutral element for the operation for the bits 121 * of writemask which are set, swizzle to identity otherwise. 122 */ 123static void set_neutral_element_swizzle(struct i915_full_src_register* r, 124 unsigned write_mask, 125 unsigned neutral) 126{ 127 if ( write_mask & TGSI_WRITEMASK_X ) 128 r->Register.SwizzleX = neutral; 129 else 130 r->Register.SwizzleX = TGSI_SWIZZLE_X; 131 132 if ( write_mask & TGSI_WRITEMASK_Y ) 133 r->Register.SwizzleY = neutral; 134 else 135 r->Register.SwizzleY = TGSI_SWIZZLE_Y; 136 137 if ( write_mask & TGSI_WRITEMASK_Z ) 138 r->Register.SwizzleZ = neutral; 139 else 140 r->Register.SwizzleZ = TGSI_SWIZZLE_Z; 141 142 if ( write_mask & TGSI_WRITEMASK_W ) 143 r->Register.SwizzleW = neutral; 144 else 145 r->Register.SwizzleW = TGSI_SWIZZLE_W; 146} 147 148static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) 149{ 150 o->File = i->File; 151 o->Indirect = i->Indirect; 152 o->Dimension = i->Dimension; 153 o->Index = i->Index; 154 o->SwizzleX = i->SwizzleX; 155 o->SwizzleY = i->SwizzleY; 156 o->SwizzleZ = i->SwizzleZ; 157 o->SwizzleW = i->SwizzleW; 158 o->Absolute = i->Absolute; 159 o->Negate = i->Negate; 160} 161 162static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) 163{ 164 o->File = i->File; 165 o->WriteMask = i->WriteMask; 166 o->Indirect = i->Indirect; 167 o->Dimension = i->Dimension; 168 o->Index = i->Index; 169} 170 171static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) 172{ 173 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); 174 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); 175 176 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); 177 178 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); 179 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); 180 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); 181} 182 183static void copy_token(union i915_full_token* o, union tgsi_full_token* i) 184{ 185 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 186 memcpy(o, i, sizeof(*o)); 187 else 188 copy_instruction(&o->FullInstruction, &i->FullInstruction); 189 190} 191 192/* 193 * Optimize away things like: 194 * MUL OUT[0].xyz, TEMP[1], TEMP[2] 195 * MOV OUT[0].w, TEMP[2] 196 * into: 197 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] 198 * This is useful for optimizing texenv. 199 */ 200static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) 201{ 202 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 203 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 204 op_commutes(current->FullInstruction.Instruction.Opcode) && 205 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 206 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 207 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 208 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && 209 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 210 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && 211 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && 212 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) 213 { 214 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 215 216 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); 217 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 218 next->FullInstruction.Dst[0].Register.WriteMask, 219 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 220 221 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | 222 next->FullInstruction.Dst[0].Register.WriteMask; 223 return; 224 } 225 226 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 227 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 228 op_commutes(current->FullInstruction.Instruction.Opcode) && 229 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 230 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 231 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 232 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && 233 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 234 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && 235 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && 236 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) 237 { 238 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 239 240 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); 241 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 242 next->FullInstruction.Dst[0].Register.WriteMask, 243 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 244 245 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | 246 next->FullInstruction.Dst[0].Register.WriteMask; 247 return; 248 } 249} 250 251/* 252 * Optimize away things like: 253 * MOV TEMP[0].xyz TEMP[0].xyzx 254 * into: 255 * NOP 256 */ 257static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current) 258{ 259 union i915_full_token current; 260 copy_token(¤t , tgsi_current); 261 if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 262 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 263 has_destination(current.FullInstruction.Instruction.Opcode) && 264 current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && 265 current.FullInstruction.Src[0].Register.Absolute == 0 && 266 current.FullInstruction.Src[0].Register.Negate == 0 && 267 is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && 268 same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) ) 269 { 270 return TRUE; 271 } 272 return FALSE; 273} 274 275/* 276 * Optimize away things like: 277 * *** TEMP[0], TEMP[1], TEMP[2] 278 * MOV OUT[0] TEMP[0] 279 * into: 280 * *** OUT[0], TEMP[1], TEMP[2] 281 */ 282static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next) 283{ 284 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 285 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 286 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 287 has_destination(current->FullInstruction.Instruction.Opcode) && 288 next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && 289 next->FullInstruction.Src[0].Register.Absolute == 0 && 290 next->FullInstruction.Src[0].Register.Negate == 0 && 291 next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT && 292 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && 293 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && 294 same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) 295 { 296 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 297 298 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; 299 return; 300 } 301} 302 303struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) 304{ 305 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); 306 struct tgsi_parse_context parse; 307 int i = 0; 308 309 out_tokens->NumTokens = 0; 310 311 /* Count the tokens */ 312 tgsi_parse_init( &parse, tokens ); 313 while( !tgsi_parse_end_of_tokens( &parse ) ) { 314 tgsi_parse_token( &parse ); 315 out_tokens->NumTokens++; 316 } 317 tgsi_parse_free (&parse); 318 319 /* Allocate our tokens */ 320 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); 321 322 tgsi_parse_init( &parse, tokens ); 323 while( !tgsi_parse_end_of_tokens( &parse ) ) { 324 tgsi_parse_token( &parse ); 325 326 if (i915_fpc_useless_mov(&parse.FullToken)) { 327 out_tokens->NumTokens--; 328 continue; 329 } 330 331 copy_token(&out_tokens->Tokens[i] , &parse.FullToken); 332 333 if (i > 0) { 334 i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 335 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 336 } 337 i++; 338 } 339 tgsi_parse_free (&parse); 340 341 return out_tokens; 342} 343 344void i915_optimize_free(struct i915_token_list* tokens) 345{ 346 free(tokens->Tokens); 347 free(tokens); 348} 349 350 351