i915_fpc_emit.c revision 2bc5e0e97ba7b6c32f6ff90cb90448173d74b89b
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "i915_reg.h" 29#include "i915_context.h" 30#include "i915_fpc.h" 31#include "util/u_math.h" 32 33 34#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 35#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 36#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 37#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) 38#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) 39#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) 40#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) 41#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) 42 43/* These are special, and don't have swizzle/negate bits. 44 */ 45#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) 46#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ 47 (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) 48 49 50/* Macros for translating UREG's into the various register fields used 51 * by the I915 programmable unit. 52 */ 53#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) 54#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) 55#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) 56#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) 57#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) 58#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) 59 60#define UREG_MASK 0xffffff00 61#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ 62 (REG_NR_MASK << UREG_NR_SHIFT)) 63 64 65uint 66i915_get_temp(struct i915_fp_compile *p) 67{ 68 int bit = ffs(~p->temp_flag); 69 if (!bit) { 70 i915_program_error(p, "i915_get_temp: out of temporaries"); 71 return 0; 72 } 73 74 p->temp_flag |= 1 << (bit - 1); 75 return bit - 1; 76} 77 78 79static void 80i915_release_temp(struct i915_fp_compile *p, int reg) 81{ 82 p->temp_flag &= ~(1 << reg); 83} 84 85 86/** 87 * Get unpreserved temporary, a temp whose value is not preserved between 88 * PS program phases. 89 */ 90uint 91i915_get_utemp(struct i915_fp_compile * p) 92{ 93 int bit = ffs(~p->utemp_flag); 94 if (!bit) { 95 i915_program_error(p, "i915_get_utemp: out of temporaries"); 96 return 0; 97 } 98 99 p->utemp_flag |= 1 << (bit - 1); 100 return UREG(REG_TYPE_U, (bit - 1)); 101} 102 103void 104i915_release_utemps(struct i915_fp_compile *p) 105{ 106 p->utemp_flag = ~0x7; 107} 108 109 110uint 111i915_emit_decl(struct i915_fp_compile *p, 112 uint type, uint nr, uint d0_flags) 113{ 114 uint reg = UREG(type, nr); 115 116 if (type == REG_TYPE_T) { 117 if (p->decl_t & (1 << nr)) 118 return reg; 119 120 p->decl_t |= (1 << nr); 121 } 122 else if (type == REG_TYPE_S) { 123 if (p->decl_s & (1 << nr)) 124 return reg; 125 126 p->decl_s |= (1 << nr); 127 } 128 else 129 return reg; 130 131 if (p->decl< p->declarations + I915_PROGRAM_SIZE) { 132 *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); 133 *(p->decl++) = D1_MBZ; 134 *(p->decl++) = D2_MBZ; 135 } 136 else 137 i915_program_error(p, "Out of declarations"); 138 139 p->nr_decl_insn++; 140 return reg; 141} 142 143uint 144i915_emit_arith(struct i915_fp_compile * p, 145 uint op, 146 uint dest, 147 uint mask, 148 uint saturate, uint src0, uint src1, uint src2) 149{ 150 uint c[3]; 151 uint nr_const = 0; 152 153 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 154 dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); 155 assert(dest); 156 157 if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) 158 c[nr_const++] = 0; 159 if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) 160 c[nr_const++] = 1; 161 if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) 162 c[nr_const++] = 2; 163 164 /* Recursively call this function to MOV additional const values 165 * into temporary registers. Use utemp registers for this - 166 * currently shouldn't be possible to run out, but keep an eye on 167 * this. 168 */ 169 if (nr_const > 1) { 170 uint s[3], first, i, old_utemp_flag; 171 172 s[0] = src0; 173 s[1] = src1; 174 s[2] = src2; 175 old_utemp_flag = p->utemp_flag; 176 177 first = GET_UREG_NR(s[c[0]]); 178 for (i = 1; i < nr_const; i++) { 179 if (GET_UREG_NR(s[c[i]]) != first) { 180 uint tmp = i915_get_utemp(p); 181 182 i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, 183 s[c[i]], 0, 0); 184 s[c[i]] = tmp; 185 } 186 } 187 188 src0 = s[0]; 189 src1 = s[1]; 190 src2 = s[2]; 191 p->utemp_flag = old_utemp_flag; /* restore */ 192 } 193 194 if (p->csr< p->program + I915_PROGRAM_SIZE) { 195 *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); 196 *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); 197 *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); 198 } 199 else 200 i915_program_error(p, "Out of instructions"); 201 202 if (GET_UREG_TYPE(dest) == REG_TYPE_R) 203 p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; 204 205 p->nr_alu_insn++; 206 return dest; 207} 208 209 210/** 211 * Emit a texture load or texkill instruction. 212 * \param dest the dest i915 register 213 * \param destmask the dest register writemask 214 * \param sampler the i915 sampler register 215 * \param coord the i915 source texcoord operand 216 * \param opcode the instruction opcode 217 */ 218uint i915_emit_texld( struct i915_fp_compile *p, 219 uint dest, 220 uint destmask, 221 uint sampler, 222 uint coord, 223 uint opcode ) 224{ 225 const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); 226 int temp = -1; 227 228 if (coord != k) { 229 /* texcoord is swizzled or negated. Need to allocate a new temporary 230 * register (a utemp / unpreserved temp) won't do. 231 */ 232 uint tempReg; 233 234 temp = i915_get_temp(p); /* get temp reg index */ 235 tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ 236 237 i915_emit_arith( p, A0_MOV, 238 tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ 239 0, /* saturate */ 240 coord, 0, 0 ); /* src0, src1, src2 */ 241 242 /* new src texcoord is tempReg */ 243 coord = tempReg; 244 } 245 246 /* Don't worry about saturate as we only support 247 */ 248 if (destmask != A0_DEST_CHANNEL_ALL) { 249 /* if not writing to XYZW... */ 250 uint tmp = i915_get_utemp(p); 251 i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); 252 i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); 253 /* XXX release utemp here? */ 254 } 255 else { 256 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 257 assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); 258 259 /* Output register being oC or oD defines a phase boundary */ 260 if (GET_UREG_TYPE(dest) == REG_TYPE_OC || 261 GET_UREG_TYPE(dest) == REG_TYPE_OD) 262 p->nr_tex_indirect++; 263 264 /* Reading from an r# register whose contents depend on output of the 265 * current phase defines a phase boundary. 266 */ 267 if (GET_UREG_TYPE(coord) == REG_TYPE_R && 268 p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) 269 p->nr_tex_indirect++; 270 271 if (p->csr< p->program + I915_PROGRAM_SIZE) { 272 *(p->csr++) = (opcode | 273 T0_DEST( dest ) | 274 T0_SAMPLER( sampler )); 275 276 *(p->csr++) = T1_ADDRESS_REG( coord ); 277 *(p->csr++) = T2_MBZ; 278 } 279 else 280 i915_program_error(p, "Out of instructions"); 281 282 if (GET_UREG_TYPE(dest) == REG_TYPE_R) 283 p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; 284 285 p->nr_tex_insn++; 286 } 287 288 if (temp >= 0) 289 i915_release_temp(p, temp); 290 291 return dest; 292} 293 294 295uint 296i915_emit_const1f(struct i915_fp_compile * p, float c0) 297{ 298 struct i915_fragment_shader *ifs = p->shader; 299 unsigned reg, idx; 300 301 if (c0 == 0.0) 302 return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); 303 if (c0 == 1.0) 304 return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); 305 306 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 307 if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) 308 continue; 309 for (idx = 0; idx < 4; idx++) { 310 if (!(ifs->constant_flags[reg] & (1 << idx)) || 311 ifs->constants[reg][idx] == c0) { 312 ifs->constants[reg][idx] = c0; 313 ifs->constant_flags[reg] |= 1 << idx; 314 if (reg + 1 > ifs->num_constants) 315 ifs->num_constants = reg + 1; 316 return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); 317 } 318 } 319 } 320 321 i915_program_error(p, "i915_emit_const1f: out of constants"); 322 return 0; 323} 324 325uint 326i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) 327{ 328 struct i915_fragment_shader *ifs = p->shader; 329 unsigned reg, idx; 330 331 if (c0 == 0.0) 332 return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); 333 if (c0 == 1.0) 334 return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); 335 336 if (c1 == 0.0) 337 return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); 338 if (c1 == 1.0) 339 return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); 340 341 // XXX emit swizzle here for 0, 1, -1 and any combination thereof 342 // we can use swizzle + neg for that 343 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 344 if (ifs->constant_flags[reg] == 0xf || 345 ifs->constant_flags[reg] == I915_CONSTFLAG_USER) 346 continue; 347 for (idx = 0; idx < 3; idx++) { 348 if (!(ifs->constant_flags[reg] & (3 << idx))) { 349 ifs->constants[reg][idx + 0] = c0; 350 ifs->constants[reg][idx + 1] = c1; 351 ifs->constant_flags[reg] |= 3 << idx; 352 if (reg + 1 > ifs->num_constants) 353 ifs->num_constants = reg + 1; 354 return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); 355 } 356 } 357 } 358 359 i915_program_error(p, "i915_emit_const2f: out of constants"); 360 return 0; 361} 362 363uint 364i915_emit_const4f(struct i915_fp_compile * p, 365 float c0, float c1, float c2, float c3) 366{ 367 struct i915_fragment_shader *ifs = p->shader; 368 unsigned reg; 369 370 // XXX emit swizzle here for 0, 1, -1 and any combination thereof 371 // we can use swizzle + neg for that 372 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 373 if (ifs->constant_flags[reg] == 0xf && 374 ifs->constants[reg][0] == c0 && 375 ifs->constants[reg][1] == c1 && 376 ifs->constants[reg][2] == c2 && 377 ifs->constants[reg][3] == c3) { 378 return UREG(REG_TYPE_CONST, reg); 379 } 380 else if (ifs->constant_flags[reg] == 0) { 381 382 ifs->constants[reg][0] = c0; 383 ifs->constants[reg][1] = c1; 384 ifs->constants[reg][2] = c2; 385 ifs->constants[reg][3] = c3; 386 ifs->constant_flags[reg] = 0xf; 387 if (reg + 1 > ifs->num_constants) 388 ifs->num_constants = reg + 1; 389 return UREG(REG_TYPE_CONST, reg); 390 } 391 } 392 393 i915_program_error(p, "i915_emit_const4f: out of constants"); 394 return 0; 395} 396 397 398uint 399i915_emit_const4fv(struct i915_fp_compile * p, const float * c) 400{ 401 return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); 402} 403