i915_fpc_optimize.c revision 053af6ac8cda226a62844fc014ed9f133557c111
1/************************************************************************** 2 * 3 * Copyright 2011 The Chromium OS authors. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "i915_reg.h" 29#include "i915_context.h" 30#include "i915_fpc.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "util/u_math.h" 34#include "util/u_memory.h" 35#include "util/u_string.h" 36#include "tgsi/tgsi_parse.h" 37#include "tgsi/tgsi_dump.h" 38 39static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) 40{ 41 return (d1->Register.File == d2->Register.File && 42 d1->Register.Indirect == d2->Register.Indirect && 43 d1->Register.Dimension == d2->Register.Dimension && 44 d1->Register.Index == d2->Register.Index); 45} 46 47static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) 48{ 49 return (d1->Register.File == d2->Register.File && 50 d1->Register.Indirect == d2->Register.Indirect && 51 d1->Register.Dimension == d2->Register.Dimension && 52 d1->Register.Index == d2->Register.Index && 53 d1->Register.Absolute == d2->Register.Absolute && 54 d1->Register.Negate == d2->Register.Negate); 55} 56 57static boolean is_unswizzled(struct i915_full_src_register* r, 58 int sx, 59 int sy, 60 int sz, 61 int sw) 62{ 63 if (sx && r->Register.SwizzleX != TGSI_SWIZZLE_X) 64 return FALSE; 65 if (sy && r->Register.SwizzleY != TGSI_SWIZZLE_Y) 66 return FALSE; 67 if (sz && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) 68 return FALSE; 69 if (sw && r->Register.SwizzleW != TGSI_SWIZZLE_W) 70 return FALSE; 71 return FALSE; 72} 73 74/* 75 * Optimize away things like: 76 * MUL OUT[0].xyz, TEMP[1], TEMP[2] 77 * MOV OUT[0].w, TEMP[2] 78 * into: 79 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] 80 * This is useful for optimizing texenv. 81 */ 82static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next) 83{ 84 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 85 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 86 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && 87 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 88 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && 89 next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && 90 same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && 91 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && 92 is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && 93 is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && 94 is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) 95 { 96 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 97 current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 98 current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE; 99 current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 100 return; 101 } 102 103 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 104 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 105 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL && 106 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 107 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ && 108 next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W && 109 same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) && 110 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && 111 is_unswizzled(¤t->FullInstruction.Src[0], 1, 1, 1, 0) && 112 is_unswizzled(¤t->FullInstruction.Src[1], 1, 1, 1, 0) && 113 is_unswizzled(&next->FullInstruction.Src[0], 0, 0, 0, 1) ) 114 { 115 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 116 current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 117 current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE; 118 current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; 119 return; 120 } 121} 122 123static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) 124{ 125 o->File = i->File; 126 o->Indirect = i->Indirect; 127 o->Dimension = i->Dimension; 128 o->Index = i->Index; 129 o->SwizzleX = i->SwizzleX; 130 o->SwizzleY = i->SwizzleY; 131 o->SwizzleZ = i->SwizzleZ; 132 o->SwizzleW = i->SwizzleW; 133 o->Absolute = i->Absolute; 134 o->Negate = i->Negate; 135} 136 137static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) 138{ 139 o->File = i->File; 140 o->WriteMask = i->WriteMask; 141 o->Indirect = i->Indirect; 142 o->Dimension = i->Dimension; 143 o->Index = i->Index; 144} 145 146static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) 147{ 148 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); 149 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); 150 151 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); 152 153 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); 154 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); 155 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); 156} 157 158static void copy_token(union i915_full_token* o, union tgsi_full_token* i) 159{ 160 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 161 memcpy(o, i, sizeof(*o)); 162 else 163 copy_instruction(&o->FullInstruction, &i->FullInstruction); 164 165} 166 167struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) 168{ 169 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); 170 out_tokens->NumTokens = 0; 171 struct tgsi_parse_context parse; 172 int i = 0; 173 174 /* Count the tokens */ 175 tgsi_parse_init( &parse, tokens ); 176 while( !tgsi_parse_end_of_tokens( &parse ) ) { 177 tgsi_parse_token( &parse ); 178 out_tokens->NumTokens++; 179 } 180 tgsi_parse_free (&parse); 181 182 /* Allocate our tokens */ 183 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); 184 185 tgsi_parse_init( &parse, tokens ); 186 while( !tgsi_parse_end_of_tokens( &parse ) ) { 187 tgsi_parse_token( &parse ); 188 copy_token(&out_tokens->Tokens[i] , &parse.FullToken); 189 190 if (i > 0) 191 i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 192 193 i++; 194 } 195 tgsi_parse_free (&parse); 196 197 return out_tokens; 198} 199 200void i915_optimize_free(struct i915_token_list* tokens) 201{ 202 free(tokens->Tokens); 203 free(tokens); 204} 205 206 207