i915_fpc_emit.c revision 4a796264dfc1bdba37a7204f3439a8da213109ed
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "i915_reg.h" 29#include "i915_context.h" 30#include "i915_fpc.h" 31#include "util/u_math.h" 32 33 34#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 35#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 36#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 37#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) 38#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) 39#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) 40#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) 41#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) 42 43/* These are special, and don't have swizzle/negate bits. 44 */ 45#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) 46#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ 47 (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) 48 49 50/* Macros for translating UREG's into the various register fields used 51 * by the I915 programmable unit. 52 */ 53#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) 54#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) 55#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) 56#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) 57#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) 58#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) 59 60#define UREG_MASK 0xffffff00 61#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ 62 (REG_NR_MASK << UREG_NR_SHIFT)) 63 64 65uint 66i915_get_temp(struct i915_fp_compile *p) 67{ 68 int bit = ffs(~p->temp_flag); 69 if (!bit) { 70 i915_program_error(p, "i915_get_temp: out of temporaries\n"); 71 return 0; 72 } 73 74 p->temp_flag |= 1 << (bit - 1); 75 return bit - 1; 76} 77 78 79static void 80i915_release_temp(struct i915_fp_compile *p, int reg) 81{ 82 p->temp_flag &= ~(1 << reg); 83} 84 85 86/** 87 * Get unpreserved temporary, a temp whose value is not preserved between 88 * PS program phases. 89 */ 90uint 91i915_get_utemp(struct i915_fp_compile * p) 92{ 93 int bit = ffs(~p->utemp_flag); 94 if (!bit) { 95 i915_program_error(p, "i915_get_utemp: out of temporaries\n"); 96 return 0; 97 } 98 99 p->utemp_flag |= 1 << (bit - 1); 100 return UREG(REG_TYPE_U, (bit - 1)); 101} 102 103void 104i915_release_utemps(struct i915_fp_compile *p) 105{ 106 p->utemp_flag = ~0x7; 107} 108 109 110uint 111i915_emit_decl(struct i915_fp_compile *p, 112 uint type, uint nr, uint d0_flags) 113{ 114 uint reg = UREG(type, nr); 115 116 if (type == REG_TYPE_T) { 117 if (p->decl_t & (1 << nr)) 118 return reg; 119 120 p->decl_t |= (1 << nr); 121 } 122 else if (type == REG_TYPE_S) { 123 if (p->decl_s & (1 << nr)) 124 return reg; 125 126 p->decl_s |= (1 << nr); 127 } 128 else 129 return reg; 130 131 *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); 132 *(p->decl++) = D1_MBZ; 133 *(p->decl++) = D2_MBZ; 134 135 p->nr_decl_insn++; 136 return reg; 137} 138 139uint 140i915_emit_arith(struct i915_fp_compile * p, 141 uint op, 142 uint dest, 143 uint mask, 144 uint saturate, uint src0, uint src1, uint src2) 145{ 146 uint c[3]; 147 uint nr_const = 0; 148 149 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 150 dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); 151 assert(dest); 152 153 if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) 154 c[nr_const++] = 0; 155 if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) 156 c[nr_const++] = 1; 157 if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) 158 c[nr_const++] = 2; 159 160 /* Recursively call this function to MOV additional const values 161 * into temporary registers. Use utemp registers for this - 162 * currently shouldn't be possible to run out, but keep an eye on 163 * this. 164 */ 165 if (nr_const > 1) { 166 uint s[3], first, i, old_utemp_flag; 167 168 s[0] = src0; 169 s[1] = src1; 170 s[2] = src2; 171 old_utemp_flag = p->utemp_flag; 172 173 first = GET_UREG_NR(s[c[0]]); 174 for (i = 1; i < nr_const; i++) { 175 if (GET_UREG_NR(s[c[i]]) != first) { 176 uint tmp = i915_get_utemp(p); 177 178 i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, 179 s[c[i]], 0, 0); 180 s[c[i]] = tmp; 181 } 182 } 183 184 src0 = s[0]; 185 src1 = s[1]; 186 src2 = s[2]; 187 p->utemp_flag = old_utemp_flag; /* restore */ 188 } 189 190 *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); 191 *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); 192 *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); 193 194 p->nr_alu_insn++; 195 return dest; 196} 197 198 199/** 200 * Emit a texture load or texkill instruction. 201 * \param dest the dest i915 register 202 * \param destmask the dest register writemask 203 * \param sampler the i915 sampler register 204 * \param coord the i915 source texcoord operand 205 * \param opcode the instruction opcode 206 */ 207uint i915_emit_texld( struct i915_fp_compile *p, 208 uint dest, 209 uint destmask, 210 uint sampler, 211 uint coord, 212 uint opcode ) 213{ 214 const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); 215 int temp = -1; 216 217 if (coord != k) { 218 /* texcoord is swizzled or negated. Need to allocate a new temporary 219 * register (a utemp / unpreserved temp) won't do. 220 */ 221 uint tempReg; 222 223 temp = i915_get_temp(p); /* get temp reg index */ 224 tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ 225 226 i915_emit_arith( p, A0_MOV, 227 tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ 228 0, /* saturate */ 229 coord, 0, 0 ); /* src0, src1, src2 */ 230 231 /* new src texcoord is tempReg */ 232 coord = tempReg; 233 } 234 235 /* Don't worry about saturate as we only support 236 */ 237 if (destmask != A0_DEST_CHANNEL_ALL) { 238 /* if not writing to XYZW... */ 239 uint tmp = i915_get_utemp(p); 240 i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); 241 i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); 242 /* XXX release utemp here? */ 243 } 244 else { 245 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 246 assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); 247 248 /* is the sampler coord a texcoord input reg? */ 249 if (GET_UREG_TYPE(coord) != REG_TYPE_T) { 250 p->nr_tex_indirect++; 251 } 252 253 *(p->csr++) = (opcode | 254 T0_DEST( dest ) | 255 T0_SAMPLER( sampler )); 256 257 *(p->csr++) = T1_ADDRESS_REG( coord ); 258 *(p->csr++) = T2_MBZ; 259 260 p->nr_tex_insn++; 261 } 262 263 if (temp >= 0) 264 i915_release_temp(p, temp); 265 266 return dest; 267} 268 269 270uint 271i915_emit_const1f(struct i915_fp_compile * p, float c0) 272{ 273 struct i915_fragment_shader *ifs = p->shader; 274 unsigned reg, idx; 275 276 if (c0 == 0.0) 277 return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); 278 if (c0 == 1.0) 279 return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); 280 281 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 282 if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) 283 continue; 284 for (idx = 0; idx < 4; idx++) { 285 if (!(ifs->constant_flags[reg] & (1 << idx)) || 286 ifs->constants[reg][idx] == c0) { 287 ifs->constants[reg][idx] = c0; 288 ifs->constant_flags[reg] |= 1 << idx; 289 if (reg + 1 > ifs->num_constants) 290 ifs->num_constants = reg + 1; 291 return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); 292 } 293 } 294 } 295 296 i915_program_error(p, "i915_emit_const1f: out of constants\n"); 297 return 0; 298} 299 300uint 301i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) 302{ 303 struct i915_fragment_shader *ifs = p->shader; 304 unsigned reg, idx; 305 306 if (c0 == 0.0) 307 return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); 308 if (c0 == 1.0) 309 return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); 310 311 if (c1 == 0.0) 312 return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); 313 if (c1 == 1.0) 314 return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); 315 316 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 317 if (ifs->constant_flags[reg] == 0xf || 318 ifs->constant_flags[reg] == I915_CONSTFLAG_USER) 319 continue; 320 for (idx = 0; idx < 3; idx++) { 321 if (!(ifs->constant_flags[reg] & (3 << idx))) { 322 ifs->constants[reg][idx + 0] = c0; 323 ifs->constants[reg][idx + 1] = c1; 324 ifs->constant_flags[reg] |= 3 << idx; 325 if (reg + 1 > ifs->num_constants) 326 ifs->num_constants = reg + 1; 327 return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); 328 } 329 } 330 } 331 332 i915_program_error(p, "i915_emit_const2f: out of constants\n"); 333 return 0; 334} 335 336 337 338uint 339i915_emit_const4f(struct i915_fp_compile * p, 340 float c0, float c1, float c2, float c3) 341{ 342 struct i915_fragment_shader *ifs = p->shader; 343 unsigned reg; 344 345 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 346 if (ifs->constant_flags[reg] == 0xf && 347 ifs->constants[reg][0] == c0 && 348 ifs->constants[reg][1] == c1 && 349 ifs->constants[reg][2] == c2 && 350 ifs->constants[reg][3] == c3) { 351 return UREG(REG_TYPE_CONST, reg); 352 } 353 else if (ifs->constant_flags[reg] == 0) { 354 355 ifs->constants[reg][0] = c0; 356 ifs->constants[reg][1] = c1; 357 ifs->constants[reg][2] = c2; 358 ifs->constants[reg][3] = c3; 359 ifs->constant_flags[reg] = 0xf; 360 if (reg + 1 > ifs->num_constants) 361 ifs->num_constants = reg + 1; 362 return UREG(REG_TYPE_CONST, reg); 363 } 364 } 365 366 i915_program_error(p, "i915_emit_const4f: out of constants\n"); 367 return 0; 368} 369 370 371uint 372i915_emit_const4fv(struct i915_fp_compile * p, const float * c) 373{ 374 return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); 375} 376