i915_program.c revision ab81d1fd999b1696df4c733a86b651e4c38b9bcc
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <strings.h> 29 30#include "glheader.h" 31#include "macros.h" 32#include "enums.h" 33 34#include "tnl/t_context.h" 35#include "intel_batchbuffer.h" 36 37#include "i915_reg.h" 38#include "i915_context.h" 39#include "i915_program.h" 40 41 42#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 43#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 44#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) 45#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) 46#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) 47#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) 48#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) 49#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) 50 51/* These are special, and don't have swizzle/negate bits. 52 */ 53#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) 54#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ 55 (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) 56 57 58/* Macros for translating UREG's into the various register fields used 59 * by the I915 programmable unit. 60 */ 61#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) 62#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) 63#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) 64#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) 65#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) 66#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) 67 68#define UREG_MASK 0xffffff00 69#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ 70 (REG_NR_MASK << UREG_NR_SHIFT)) 71 72 73#define I915_CONSTFLAG_PARAM 0x1f 74 75GLuint i915_get_temp( struct i915_fragment_program *p ) 76{ 77 int bit = ffs( ~p->temp_flag ); 78 if (!bit) { 79 fprintf(stderr, "%s: out of temporaries\n", __FILE__); 80 exit(1); 81 } 82 83 p->temp_flag |= 1<<(bit-1); 84 return UREG(REG_TYPE_R, (bit-1)); 85} 86 87 88GLuint i915_get_utemp( struct i915_fragment_program *p ) 89{ 90 int bit = ffs( ~p->utemp_flag ); 91 if (!bit) { 92 fprintf(stderr, "%s: out of temporaries\n", __FILE__); 93 exit(1); 94 } 95 96 p->utemp_flag |= 1<<(bit-1); 97 return UREG(REG_TYPE_U, (bit-1)); 98} 99 100void i915_release_utemps( struct i915_fragment_program *p ) 101{ 102 p->utemp_flag = ~0x7; 103} 104 105 106GLuint i915_emit_decl( struct i915_fragment_program *p, 107 GLuint type, GLuint nr, GLuint d0_flags ) 108{ 109 GLuint reg = UREG(type, nr); 110 111 if (type == REG_TYPE_T) { 112 if (p->decl_t & (1<<nr)) 113 return reg; 114 115 p->decl_t |= (1<<nr); 116 } 117 else if (type == REG_TYPE_S) { 118 if (p->decl_s & (1<<nr)) 119 return reg; 120 121 p->decl_s |= (1<<nr); 122 } 123 else 124 return reg; 125 126 *(p->decl++) = (D0_DCL | D0_DEST( reg ) | d0_flags); 127 *(p->decl++) = D1_MBZ; 128 *(p->decl++) = D2_MBZ; 129 130 p->nr_decl_insn++; 131 return reg; 132} 133 134GLuint i915_emit_arith( struct i915_fragment_program *p, 135 GLuint op, 136 GLuint dest, 137 GLuint mask, 138 GLuint saturate, 139 GLuint src0, 140 GLuint src1, 141 GLuint src2 ) 142{ 143 GLuint c[3]; 144 GLuint nr_const = 0; 145 146 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 147 assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); 148 149 if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) c[nr_const++] = 0; 150 if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) c[nr_const++] = 1; 151 if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) c[nr_const++] = 2; 152 153 /* Recursively call this function to MOV additional const values 154 * into temporary registers. Use utemp registers for this - 155 * currently shouldn't be possible to run out, but keep an eye on 156 * this. 157 */ 158 if (nr_const > 1) { 159 GLuint s[3], first, i, old_utemp_flag; 160 161 s[0] = src0; 162 s[1] = src1; 163 s[2] = src2; 164 old_utemp_flag = p->utemp_flag; 165 166 first = GET_UREG_NR(s[c[0]]); 167 for (i = 1 ; i < nr_const ; i++) { 168 if (GET_UREG_NR(s[c[i]]) != first) { 169 GLuint tmp = i915_get_utemp(p); 170 171 i915_emit_arith( p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, 172 s[c[i]], 0, 0 ); 173 s[c[i]] = tmp; 174 } 175 } 176 177 src0 = s[0]; 178 src1 = s[1]; 179 src2 = s[2]; 180 p->utemp_flag = old_utemp_flag; /* restore */ 181 } 182 183 *(p->csr++) = (op | 184 A0_DEST( dest ) | 185 mask | 186 saturate | 187 A0_SRC0( src0 )); 188 *(p->csr++) = (A1_SRC0( src0 ) | 189 A1_SRC1( src1 )); 190 *(p->csr++) = (A2_SRC1( src1 ) | 191 A2_SRC2( src2 )); 192 193 p->nr_alu_insn++; 194 return dest; 195} 196 197GLuint i915_emit_texld( struct i915_fragment_program *p, 198 GLuint dest, 199 GLuint destmask, 200 GLuint sampler, 201 GLuint coord, 202 GLuint op ) 203{ 204 assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); 205 assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); 206 207 if (GET_UREG_TYPE(coord) != REG_TYPE_T) { 208 p->nr_tex_indirect++; 209 } 210 211 *(p->csr++) = (op | 212 T0_DEST( dest ) | 213 destmask | 214 T0_SAMPLER( sampler )); 215 216 *(p->csr++) = T1_ADDRESS_REG( coord ); 217 *(p->csr++) = T2_MBZ; 218 219 p->nr_tex_insn++; 220 return dest; 221} 222 223 224GLuint i915_emit_const1f( struct i915_fragment_program *p, GLfloat c0 ) 225{ 226 GLint reg, idx; 227 228 if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); 229 if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE ); 230 231 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 232 if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) 233 continue; 234 for (idx = 0; idx < 4; idx++) { 235 if (!(p->constant_flags[reg] & (1<<idx)) || 236 p->constant[reg][idx] == c0) { 237 p->constant[reg][idx] = c0; 238 p->constant_flags[reg] |= 1<<idx; 239 if (reg+1 > p->nr_constants) p->nr_constants = reg+1; 240 return swizzle(UREG(REG_TYPE_CONST, reg),idx,ZERO,ZERO,ONE); 241 } 242 } 243 } 244 245 fprintf(stderr, "%s: out of constants\n", __FUNCTION__); 246 p->error = 1; 247 return 0; 248} 249 250GLuint i915_emit_const2f( struct i915_fragment_program *p, 251 GLfloat c0, GLfloat c1 ) 252{ 253 GLint reg, idx; 254 255 if (c0 == 0.0) return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); 256 if (c0 == 1.0) return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); 257 258 if (c1 == 0.0) return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); 259 if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); 260 261 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 262 if (p->constant_flags[reg] == 0xf || 263 p->constant_flags[reg] == I915_CONSTFLAG_PARAM) 264 continue; 265 for (idx = 0; idx < 3; idx++) { 266 if (!(p->constant_flags[reg] & (3<<idx))) { 267 p->constant[reg][idx] = c0; 268 p->constant[reg][idx+1] = c1; 269 p->constant_flags[reg] |= 3<<idx; 270 if (reg+1 > p->nr_constants) p->nr_constants = reg+1; 271 return swizzle(UREG(REG_TYPE_CONST, reg),idx,idx+1,ZERO,ONE); 272 } 273 } 274 } 275 276 fprintf(stderr, "%s: out of constants\n", __FUNCTION__); 277 p->error = 1; 278 return 0; 279} 280 281 282 283GLuint i915_emit_const4f( struct i915_fragment_program *p, 284 GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3 ) 285{ 286 GLint reg; 287 288 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 289 if (p->constant_flags[reg] == 0xf && 290 p->constant[reg][0] == c0 && 291 p->constant[reg][1] == c1 && 292 p->constant[reg][2] == c2 && 293 p->constant[reg][3] == c3) { 294 return UREG(REG_TYPE_CONST, reg); 295 } 296 else if (p->constant_flags[reg] == 0) { 297 p->constant[reg][0] = c0; 298 p->constant[reg][1] = c1; 299 p->constant[reg][2] = c2; 300 p->constant[reg][3] = c3; 301 p->constant_flags[reg] = 0xf; 302 if (reg+1 > p->nr_constants) p->nr_constants = reg+1; 303 return UREG(REG_TYPE_CONST, reg); 304 } 305 } 306 307 fprintf(stderr, "%s: out of constants\n", __FUNCTION__); 308 p->error = 1; 309 return 0; 310} 311 312 313GLuint i915_emit_const4fv( struct i915_fragment_program *p, const GLfloat *c ) 314{ 315 return i915_emit_const4f( p, c[0], c[1], c[2], c[3] ); 316} 317 318 319GLuint i915_emit_param4fv( struct i915_fragment_program *p, 320 const GLfloat *values ) 321{ 322 GLint reg, i; 323 324 for (i = 0; i < p->nr_params; i++) { 325 if (p->param[i].values == values) 326 return UREG(REG_TYPE_CONST, p->param[i].reg); 327 } 328 329 330 for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { 331 if (p->constant_flags[reg] == 0) { 332 p->constant_flags[reg] = I915_CONSTFLAG_PARAM; 333 i = p->nr_params++; 334 335 p->param[i].values = values; 336 p->param[i].reg = reg; 337 p->params_uptodate = 0; 338 339 if (reg+1 > p->nr_constants) p->nr_constants = reg+1; 340 return UREG(REG_TYPE_CONST, reg); 341 } 342 } 343 344 fprintf(stderr, "%s: out of constants\n", __FUNCTION__); 345 p->error = 1; 346 return 0; 347} 348 349 350 351 352void i915_program_error( struct i915_fragment_program *p, const GLubyte *msg ) 353{ 354 fprintf(stderr, "%s\n", msg); 355 p->error = 1; 356} 357 358void i915_init_program( i915ContextPtr i915, struct i915_fragment_program *p ) 359{ 360 GLcontext *ctx = &i915->intel.ctx; 361 TNLcontext *tnl = TNL_CONTEXT( ctx ); 362 363 p->translated = 0; 364 p->params_uptodate = 0; 365 p->on_hardware = 0; 366 p->error = 0; 367 368 p->nr_tex_indirect = 1; /* correct? */ 369 p->nr_tex_insn = 0; 370 p->nr_alu_insn = 0; 371 p->nr_decl_insn = 0; 372 373 p->ctx = ctx; 374 memset( p->constant_flags, 0, sizeof(p->constant_flags) ); 375 376 p->nr_constants = 0; 377 p->csr = p->program; 378 p->decl = p->declarations; 379 p->decl_s = 0; 380 p->decl_t = 0; 381 p->temp_flag = 0xffff000; 382 p->utemp_flag = ~0x7; 383 p->wpos_tex = -1; 384 p->depth_written = 0; 385 p->nr_params = 0; 386 387 p->src_texture = UREG_BAD; 388 p->src_previous = UREG(REG_TYPE_T, T_DIFFUSE); 389 p->last_tex_stage = 0; 390 p->VB = &tnl->vb; 391 392 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 393} 394 395 396void i915_fini_program( struct i915_fragment_program *p ) 397{ 398 GLuint program_size = p->csr - p->program; 399 GLuint decl_size = p->decl - p->declarations; 400 401 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 402 i915_program_error(p, "Exceeded max nr indirect texture lookups"); 403 404 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 405 i915_program_error(p, "Exceeded max TEX instructions"); 406 407 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 408 i915_program_error(p, "Exceeded max ALU instructions"); 409 410 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 411 i915_program_error(p, "Exceeded max DECL instructions"); 412 413 p->declarations[0] |= program_size + decl_size - 2; 414} 415 416void i915_upload_program( i915ContextPtr i915, struct i915_fragment_program *p ) 417{ 418 GLuint program_size = p->csr - p->program; 419 GLuint decl_size = p->decl - p->declarations; 420 421 FALLBACK( &i915->intel, I915_FALLBACK_PROGRAM, p->error ); 422 423 /* Could just go straight to the batchbuffer from here: 424 */ 425 if (i915->state.ProgramSize != (program_size + decl_size) || 426 memcmp(i915->state.Program + decl_size, p->program, 427 program_size*sizeof(int)) != 0) { 428 I915_STATECHANGE( i915, I915_UPLOAD_PROGRAM ); 429 memcpy(i915->state.Program, p->declarations, decl_size*sizeof(int)); 430 memcpy(i915->state.Program + decl_size, p->program, 431 program_size*sizeof(int)); 432 i915->state.ProgramSize = decl_size + program_size; 433 } 434 435 /* Always seemed to get a failure if I used memcmp() to 436 * shortcircuit this state upload. Needs further investigation? 437 */ 438 if (p->nr_constants) { 439 GLuint nr = p->nr_constants; 440 441 I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 1 ); 442 I915_STATECHANGE( i915, I915_UPLOAD_CONSTANTS ); 443 444 i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4); 445 i915->state.Constant[1] = (1<<(nr-1)) | ((1<<(nr-1))-1); 446 447 memcpy(&i915->state.Constant[2], p->constant, 4*sizeof(int)*(nr)); 448 i915->state.ConstantSize = 2 + (nr) * 4; 449 450 if (0) { 451 GLuint i; 452 for (i = 0; i < nr; i++) { 453 fprintf(stderr, "const[%d]: %f %f %f %f\n", i, 454 p->constant[i][0], 455 p->constant[i][1], 456 p->constant[i][2], 457 p->constant[i][3]); 458 } 459 } 460 } 461 else { 462 I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 0 ); 463 } 464 465 p->on_hardware = 1; 466} 467