nvfx_vertprog.c revision a2fc42b899de22273c1df96091bfb5c636075cb0
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "util/u_linkage.h" 5#include "util/u_debug.h" 6 7#include "pipe/p_shader_tokens.h" 8#include "tgsi/tgsi_parse.h" 9#include "tgsi/tgsi_dump.h" 10#include "tgsi/tgsi_util.h" 11#include "tgsi/tgsi_ureg.h" 12 13#include "draw/draw_context.h" 14 15#include "nv30-40_3d.xml.h" 16#include "nv30_context.h" 17#include "nv30_resource.h" 18 19/* TODO (at least...): 20 * 1. Indexed consts + ARL 21 * 3. NV_vp11, NV_vp2, NV_vp3 features 22 * - extra arith opcodes 23 * - branching 24 * - texture sampling 25 * - indexed attribs 26 * - indexed results 27 * 4. bugs 28 */ 29 30#include "nv30_vertprog.h" 31#include "nv40_vertprog.h" 32 33struct nvfx_loop_entry { 34 unsigned brk_target; 35 unsigned cont_target; 36}; 37 38struct nvfx_vpc { 39 struct nv30_context* nv30; 40 struct pipe_shader_state pipe; 41 struct nv30_vertprog *vp; 42 struct tgsi_shader_info* info; 43 44 struct nv30_vertprog_exec *vpi; 45 46 unsigned r_temps; 47 unsigned r_temps_discard; 48 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 49 struct nvfx_reg *r_address; 50 struct nvfx_reg *r_temp; 51 struct nvfx_reg *r_const; 52 struct nvfx_reg r_0_1; 53 54 struct nvfx_reg *imm; 55 unsigned nr_imm; 56 57 int hpos_idx; 58 int cvtx_idx; 59 60 struct util_dynarray label_relocs; 61 struct util_dynarray loop_stack; 62}; 63 64static struct nvfx_reg 65temp(struct nvfx_vpc *vpc) 66{ 67 int idx = ffs(~vpc->r_temps) - 1; 68 69 if (idx < 0) { 70 NOUVEAU_ERR("out of temps!!\n"); 71 assert(0); 72 return nvfx_reg(NVFXSR_TEMP, 0); 73 } 74 75 vpc->r_temps |= (1 << idx); 76 vpc->r_temps_discard |= (1 << idx); 77 return nvfx_reg(NVFXSR_TEMP, idx); 78} 79 80static inline void 81release_temps(struct nvfx_vpc *vpc) 82{ 83 vpc->r_temps &= ~vpc->r_temps_discard; 84 vpc->r_temps_discard = 0; 85} 86 87static struct nvfx_reg 88constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) 89{ 90 struct nv30_vertprog *vp = vpc->vp; 91 struct nv30_vertprog_data *vpd; 92 int idx; 93 94 if (pipe >= 0) { 95 for (idx = 0; idx < vp->nr_consts; idx++) { 96 if (vp->consts[idx].index == pipe) 97 return nvfx_reg(NVFXSR_CONST, idx); 98 } 99 } 100 101 idx = vp->nr_consts++; 102 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); 103 vpd = &vp->consts[idx]; 104 105 vpd->index = pipe; 106 vpd->value[0] = x; 107 vpd->value[1] = y; 108 vpd->value[2] = z; 109 vpd->value[3] = w; 110 return nvfx_reg(NVFXSR_CONST, idx); 111} 112 113#define arith(s,t,o,d,m,s0,s1,s2) \ 114 nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2)) 115 116static void 117emit_src(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, 118 int pos, struct nvfx_src src) 119{ 120 struct nv30_vertprog *vp = vpc->vp; 121 uint32_t sr = 0; 122 struct nvfx_relocation reloc; 123 124 switch (src.reg.type) { 125 case NVFXSR_TEMP: 126 sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); 127 sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); 128 break; 129 case NVFXSR_INPUT: 130 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << 131 NVFX_VP(SRC_REG_TYPE_SHIFT)); 132 vp->ir |= (1 << src.reg.index); 133 hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); 134 break; 135 case NVFXSR_CONST: 136 sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << 137 NVFX_VP(SRC_REG_TYPE_SHIFT)); 138 if (src.reg.index < 512) { 139 reloc.location = vp->nr_insns - 1; 140 reloc.target = src.reg.index; 141 util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); 142 } else { 143 hw[1] |= (src.reg.index - 512) << NVFX_VP(INST_CONST_SRC_SHIFT); 144 } 145 break; 146 case NVFXSR_NONE: 147 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << 148 NVFX_VP(SRC_REG_TYPE_SHIFT)); 149 break; 150 default: 151 assert(0); 152 } 153 154 if (src.negate) 155 sr |= NVFX_VP(SRC_NEGATE); 156 157 if (src.abs) 158 hw[0] |= (1 << (21 + pos)); 159 160 sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | 161 (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | 162 (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | 163 (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); 164 165 if(src.indirect) { 166 if(src.reg.type == NVFXSR_CONST) 167 hw[3] |= NVFX_VP(INST_INDEX_CONST); 168 else if(src.reg.type == NVFXSR_INPUT) 169 hw[0] |= NVFX_VP(INST_INDEX_INPUT); 170 else 171 assert(0); 172 if(src.indirect_reg) 173 hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); 174 hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); 175 } 176 177 switch (pos) { 178 case 0: 179 hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> 180 NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); 181 hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << 182 NVFX_VP(INST_SRC0L_SHIFT); 183 break; 184 case 1: 185 hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); 186 break; 187 case 2: 188 hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> 189 NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); 190 hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << 191 NVFX_VP(INST_SRC2L_SHIFT); 192 break; 193 default: 194 assert(0); 195 } 196} 197 198static void 199emit_dst(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, 200 int slot, struct nvfx_reg dst) 201{ 202 struct nv30_vertprog *vp = vpc->vp; 203 204 switch (dst.type) { 205 case NVFXSR_NONE: 206 if(!nv30->is_nv4x) 207 hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK; 208 else { 209 hw[3] |= NV40_VP_INST_DEST_MASK; 210 if (slot == 0) 211 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; 212 else 213 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 214 } 215 break; 216 case NVFXSR_TEMP: 217 if(!nv30->is_nv4x) 218 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); 219 else { 220 hw[3] |= NV40_VP_INST_DEST_MASK; 221 if (slot == 0) 222 hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); 223 else 224 hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); 225 } 226 break; 227 case NVFXSR_OUTPUT: 228 /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ 229 if(nv30->is_nv4x) { 230 switch (dst.index) { 231 case NV30_VP_INST_DEST_CLP(0): 232 dst.index = NVFX_VP(INST_DEST_FOGC); 233 vp->or |= (1 << 6); 234 break; 235 case NV30_VP_INST_DEST_CLP(1): 236 dst.index = NVFX_VP(INST_DEST_FOGC); 237 vp->or |= (1 << 7); 238 break; 239 case NV30_VP_INST_DEST_CLP(2): 240 dst.index = NVFX_VP(INST_DEST_FOGC); 241 vp->or |= (1 << 8); 242 break; 243 case NV30_VP_INST_DEST_CLP(3): 244 dst.index = NVFX_VP(INST_DEST_PSZ); 245 vp->or |= (1 << 9); 246 break; 247 case NV30_VP_INST_DEST_CLP(4): 248 dst.index = NVFX_VP(INST_DEST_PSZ); 249 vp->or |= (1 << 10); 250 break; 251 case NV30_VP_INST_DEST_CLP(5): 252 dst.index = NVFX_VP(INST_DEST_PSZ); 253 vp->or |= (1 << 11); 254 break; 255 case NV40_VP_INST_DEST_COL0: vp->or |= (1 << 0); break; 256 case NV40_VP_INST_DEST_COL1: vp->or |= (1 << 1); break; 257 case NV40_VP_INST_DEST_BFC0: vp->or |= (1 << 2); break; 258 case NV40_VP_INST_DEST_BFC1: vp->or |= (1 << 3); break; 259 case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; 260 case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; 261 } 262 } 263 264 if(!nv30->is_nv4x) { 265 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); 266 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK; 267 268 /*XXX: no way this is entirely correct, someone needs to 269 * figure out what exactly it is. 270 */ 271 hw[3] |= 0x800; 272 } else { 273 hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); 274 if (slot == 0) { 275 hw[0] |= NV40_VP_INST_VEC_RESULT; 276 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; 277 } else { 278 hw[3] |= NV40_VP_INST_SCA_RESULT; 279 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 280 } 281 } 282 break; 283 default: 284 assert(0); 285 } 286} 287 288static void 289nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) 290{ 291 struct nv30_context *nv30 = vpc->nv30; 292 struct nv30_vertprog *vp = vpc->vp; 293 unsigned slot = insn.op >> 7; 294 unsigned op = insn.op & 0x7f; 295 uint32_t *hw; 296 297 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); 298 vpc->vpi = &vp->insns[vp->nr_insns - 1]; 299 memset(vpc->vpi, 0, sizeof(*vpc->vpi)); 300 301 hw = vpc->vpi->data; 302 303 if (insn.cc_test != NVFX_COND_TR) 304 hw[0] |= NVFX_VP(INST_COND_TEST_ENABLE); 305 hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT)); 306 hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | 307 (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | 308 (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | 309 (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); 310 if(insn.cc_update) 311 hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); 312 313 if(insn.sat) { 314 assert(nv30->use_nv4x); 315 if(nv30->use_nv4x) 316 hw[0] |= NV40_VP_INST_SATURATE; 317 } 318 319 if(!nv30->is_nv4x) { 320 if(slot == 0) 321 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); 322 else { 323 hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); 324 hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); 325 } 326// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); 327// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); 328 329 if (insn.dst.type == NVFXSR_OUTPUT) { 330 if (slot) 331 hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); 332 else 333 hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); 334 } else { 335 if (slot) 336 hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); 337 else 338 hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); 339 } 340 } else { 341 if (slot == 0) { 342 hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); 343 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 344 hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); 345 } else { 346 hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); 347 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ; 348 hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); 349 } 350 } 351 352 emit_dst(nv30, vpc, hw, slot, insn.dst); 353 emit_src(nv30, vpc, hw, 0, insn.src[0]); 354 emit_src(nv30, vpc, hw, 1, insn.src[1]); 355 emit_src(nv30, vpc, hw, 2, insn.src[2]); 356 357// if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL) 358// hw[3] |= NV40_VP_INST_SCA_RESULT; 359} 360 361static inline struct nvfx_src 362tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { 363 struct nvfx_src src; 364 365 switch (fsrc->Register.File) { 366 case TGSI_FILE_INPUT: 367 src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index); 368 break; 369 case TGSI_FILE_CONSTANT: 370 src.reg = vpc->r_const[fsrc->Register.Index]; 371 break; 372 case TGSI_FILE_IMMEDIATE: 373 src.reg = vpc->imm[fsrc->Register.Index]; 374 break; 375 case TGSI_FILE_TEMPORARY: 376 src.reg = vpc->r_temp[fsrc->Register.Index]; 377 break; 378 default: 379 NOUVEAU_ERR("bad src file\n"); 380 src.reg.index = 0; 381 src.reg.type = -1; 382 break; 383 } 384 385 src.abs = fsrc->Register.Absolute; 386 src.negate = fsrc->Register.Negate; 387 src.swz[0] = fsrc->Register.SwizzleX; 388 src.swz[1] = fsrc->Register.SwizzleY; 389 src.swz[2] = fsrc->Register.SwizzleZ; 390 src.swz[3] = fsrc->Register.SwizzleW; 391 src.indirect = 0; 392 src.indirect_reg = 0; 393 src.indirect_swz = 0; 394 395 if(fsrc->Register.Indirect) { 396 if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && 397 (fsrc->Register.File == TGSI_FILE_CONSTANT || 398 fsrc->Register.File == TGSI_FILE_INPUT)) { 399 src.indirect = 1; 400 src.indirect_reg = fsrc->Indirect.Index; 401 src.indirect_swz = fsrc->Indirect.SwizzleX; 402 } else { 403 src.reg.index = 0; 404 src.reg.type = -1; 405 } 406 } 407 408 return src; 409} 410 411static INLINE struct nvfx_reg 412tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { 413 struct nvfx_reg dst; 414 415 switch (fdst->Register.File) { 416 case TGSI_FILE_NULL: 417 dst = nvfx_reg(NVFXSR_NONE, 0); 418 break; 419 case TGSI_FILE_OUTPUT: 420 dst = vpc->r_result[fdst->Register.Index]; 421 break; 422 case TGSI_FILE_TEMPORARY: 423 dst = vpc->r_temp[fdst->Register.Index]; 424 break; 425 case TGSI_FILE_ADDRESS: 426 dst = vpc->r_address[fdst->Register.Index]; 427 break; 428 default: 429 NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File); 430 dst.index = 0; 431 dst.type = 0; 432 break; 433 } 434 435 return dst; 436} 437 438static inline int 439tgsi_mask(uint tgsi) 440{ 441 int mask = 0; 442 443 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; 444 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; 445 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; 446 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; 447 return mask; 448} 449 450static boolean 451nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc, 452 unsigned idx, const struct tgsi_full_instruction *finst) 453{ 454 struct nvfx_src src[3], tmp; 455 struct nvfx_reg dst; 456 struct nvfx_reg final_dst; 457 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 458 struct nvfx_insn insn; 459 struct nvfx_relocation reloc; 460 struct nvfx_loop_entry loop; 461 boolean sat = FALSE; 462 int mask; 463 int ai = -1, ci = -1, ii = -1; 464 int i; 465 unsigned sub_depth = 0; 466 467 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 468 const struct tgsi_full_src_register *fsrc; 469 470 fsrc = &finst->Src[i]; 471 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 472 src[i] = tgsi_src(vpc, fsrc); 473 } 474 } 475 476 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 477 const struct tgsi_full_src_register *fsrc; 478 479 fsrc = &finst->Src[i]; 480 481 switch (fsrc->Register.File) { 482 case TGSI_FILE_INPUT: 483 if (ai == -1 || ai == fsrc->Register.Index) { 484 ai = fsrc->Register.Index; 485 src[i] = tgsi_src(vpc, fsrc); 486 } else { 487 src[i] = nvfx_src(temp(vpc)); 488 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 489 tgsi_src(vpc, fsrc), none, none)); 490 } 491 break; 492 case TGSI_FILE_CONSTANT: 493 if ((ci == -1 && ii == -1) || 494 ci == fsrc->Register.Index) { 495 ci = fsrc->Register.Index; 496 src[i] = tgsi_src(vpc, fsrc); 497 } else { 498 src[i] = nvfx_src(temp(vpc)); 499 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 500 tgsi_src(vpc, fsrc), none, none)); 501 } 502 break; 503 case TGSI_FILE_IMMEDIATE: 504 if ((ci == -1 && ii == -1) || 505 ii == fsrc->Register.Index) { 506 ii = fsrc->Register.Index; 507 src[i] = tgsi_src(vpc, fsrc); 508 } else { 509 src[i] = nvfx_src(temp(vpc)); 510 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 511 tgsi_src(vpc, fsrc), none, none)); 512 } 513 break; 514 case TGSI_FILE_TEMPORARY: 515 /* handled above */ 516 break; 517 default: 518 NOUVEAU_ERR("bad src file\n"); 519 return FALSE; 520 } 521 } 522 523 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 524 if(src[i].reg.type < 0) 525 return FALSE; 526 } 527 528 if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS && 529 finst->Instruction.Opcode != TGSI_OPCODE_ARL) 530 return FALSE; 531 532 final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]); 533 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 534 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { 535 assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL); 536 if (nv30->use_nv4x) 537 sat = TRUE; 538 else 539 if(dst.type != NVFXSR_TEMP) 540 dst = temp(vpc); 541 } 542 543 switch (finst->Instruction.Opcode) { 544 case TGSI_OPCODE_ABS: 545 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none)); 546 break; 547 case TGSI_OPCODE_ADD: 548 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1])); 549 break; 550 case TGSI_OPCODE_ARL: 551 nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); 552 break; 553 case TGSI_OPCODE_CMP: 554 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); 555 insn.cc_update = 1; 556 nvfx_vp_emit(vpc, insn); 557 558 insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none); 559 insn.cc_test = NVFX_COND_GE; 560 nvfx_vp_emit(vpc, insn); 561 562 insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none); 563 insn.cc_test = NVFX_COND_LT; 564 nvfx_vp_emit(vpc, insn); 565 break; 566 case TGSI_OPCODE_COS: 567 nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0])); 568 break; 569 case TGSI_OPCODE_DP2: 570 tmp = nvfx_src(temp(vpc)); 571 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none)); 572 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y))); 573 break; 574 case TGSI_OPCODE_DP3: 575 nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none)); 576 break; 577 case TGSI_OPCODE_DP4: 578 nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none)); 579 break; 580 case TGSI_OPCODE_DPH: 581 nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none)); 582 break; 583 case TGSI_OPCODE_DST: 584 nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none)); 585 break; 586 case TGSI_OPCODE_EX2: 587 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0])); 588 break; 589 case TGSI_OPCODE_EXP: 590 nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0])); 591 break; 592 case TGSI_OPCODE_FLR: 593 nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none)); 594 break; 595 case TGSI_OPCODE_FRC: 596 nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none)); 597 break; 598 case TGSI_OPCODE_LG2: 599 nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0])); 600 break; 601 case TGSI_OPCODE_LIT: 602 nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0])); 603 break; 604 case TGSI_OPCODE_LOG: 605 nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0])); 606 break; 607 case TGSI_OPCODE_LRP: 608 tmp = nvfx_src(temp(vpc)); 609 nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 610 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp)); 611 break; 612 case TGSI_OPCODE_MAD: 613 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2])); 614 break; 615 case TGSI_OPCODE_MAX: 616 nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none)); 617 break; 618 case TGSI_OPCODE_MIN: 619 nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none)); 620 break; 621 case TGSI_OPCODE_MOV: 622 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none)); 623 break; 624 case TGSI_OPCODE_MUL: 625 nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none)); 626 break; 627 case TGSI_OPCODE_NOP: 628 break; 629 case TGSI_OPCODE_POW: 630 tmp = nvfx_src(temp(vpc)); 631 nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X))); 632 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 633 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X))); 634 break; 635 case TGSI_OPCODE_RCP: 636 nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0])); 637 break; 638 case TGSI_OPCODE_RSQ: 639 nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0]))); 640 break; 641 case TGSI_OPCODE_SEQ: 642 nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none)); 643 break; 644 case TGSI_OPCODE_SFL: 645 nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none)); 646 break; 647 case TGSI_OPCODE_SGE: 648 nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none)); 649 break; 650 case TGSI_OPCODE_SGT: 651 nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none)); 652 break; 653 case TGSI_OPCODE_SIN: 654 nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0])); 655 break; 656 case TGSI_OPCODE_SLE: 657 nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none)); 658 break; 659 case TGSI_OPCODE_SLT: 660 nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none)); 661 break; 662 case TGSI_OPCODE_SNE: 663 nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none)); 664 break; 665 case TGSI_OPCODE_SSG: 666 nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], src[1], none)); 667 break; 668 case TGSI_OPCODE_STR: 669 nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none)); 670 break; 671 case TGSI_OPCODE_SUB: 672 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); 673 break; 674 case TGSI_OPCODE_TRUNC: 675 tmp = nvfx_src(temp(vpc)); 676 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); 677 insn.cc_update = 1; 678 nvfx_vp_emit(vpc, insn); 679 680 nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none)); 681 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none)); 682 683 insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none); 684 insn.cc_test = NVFX_COND_LT; 685 nvfx_vp_emit(vpc, insn); 686 break; 687 case TGSI_OPCODE_XPD: 688 tmp = nvfx_src(temp(vpc)); 689 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); 690 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); 691 break; 692 case TGSI_OPCODE_IF: 693 insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none); 694 insn.cc_update = 1; 695 nvfx_vp_emit(vpc, insn); 696 697 reloc.location = vpc->vp->nr_insns; 698 reloc.target = finst->Label.Label + 1; 699 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 700 701 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); 702 insn.cc_test = NVFX_COND_EQ; 703 insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0; 704 nvfx_vp_emit(vpc, insn); 705 break; 706 case TGSI_OPCODE_ELSE: 707 case TGSI_OPCODE_BRA: 708 case TGSI_OPCODE_CAL: 709 reloc.location = vpc->vp->nr_insns; 710 reloc.target = finst->Label.Label; 711 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 712 713 if(finst->Instruction.Opcode == TGSI_OPCODE_CAL) 714 insn = arith(0, SCA, CAL, none.reg, 0, none, none, none); 715 else 716 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); 717 nvfx_vp_emit(vpc, insn); 718 break; 719 case TGSI_OPCODE_RET: 720 if(sub_depth || !vpc->vp->enabled_ucps) { 721 tmp = none; 722 tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0; 723 nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp)); 724 } else { 725 reloc.location = vpc->vp->nr_insns; 726 reloc.target = vpc->info->num_instructions; 727 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 728 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 729 } 730 break; 731 case TGSI_OPCODE_BGNSUB: 732 ++sub_depth; 733 break; 734 case TGSI_OPCODE_ENDSUB: 735 --sub_depth; 736 break; 737 case TGSI_OPCODE_ENDIF: 738 /* nothing to do here */ 739 break; 740 case TGSI_OPCODE_BGNLOOP: 741 loop.cont_target = idx; 742 loop.brk_target = finst->Label.Label + 1; 743 util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop); 744 break; 745 case TGSI_OPCODE_ENDLOOP: 746 loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry); 747 748 reloc.location = vpc->vp->nr_insns; 749 reloc.target = loop.cont_target; 750 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 751 752 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 753 break; 754 case TGSI_OPCODE_CONT: 755 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); 756 757 reloc.location = vpc->vp->nr_insns; 758 reloc.target = loop.cont_target; 759 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 760 761 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 762 break; 763 case TGSI_OPCODE_BRK: 764 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); 765 766 reloc.location = vpc->vp->nr_insns; 767 reloc.target = loop.brk_target; 768 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 769 770 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 771 break; 772 case TGSI_OPCODE_END: 773 assert(!sub_depth); 774 if(vpc->vp->enabled_ucps) { 775 if(idx != (vpc->info->num_instructions - 1)) { 776 reloc.location = vpc->vp->nr_insns; 777 reloc.target = vpc->info->num_instructions; 778 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 779 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 780 } 781 } else { 782 if(vpc->vp->nr_insns) 783 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 784 nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none)); 785 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 786 } 787 break; 788 default: 789 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 790 return FALSE; 791 } 792 793 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nv30->use_nv4x) { 794 if (!vpc->r_0_1.type) 795 vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0); 796 nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none)); 797 nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none)); 798 } 799 800 release_temps(vpc); 801 return TRUE; 802} 803 804static boolean 805nvfx_vertprog_parse_decl_output(struct nv30_context *nv30, struct nvfx_vpc *vpc, 806 const struct tgsi_full_declaration *fdec) 807{ 808 unsigned num_texcoords = nv30->is_nv4x ? 10 : 8; 809 unsigned idx = fdec->Range.First; 810 int hw = 0, i; 811 812 switch (fdec->Semantic.Name) { 813 case TGSI_SEMANTIC_POSITION: 814 hw = NVFX_VP(INST_DEST_POS); 815 vpc->hpos_idx = idx; 816 break; 817 case TGSI_SEMANTIC_CLIPVERTEX: 818 vpc->r_result[idx] = temp(vpc); 819 vpc->r_temps_discard = 0; 820 vpc->cvtx_idx = idx; 821 return TRUE; 822 case TGSI_SEMANTIC_COLOR: 823 if (fdec->Semantic.Index == 0) { 824 hw = NVFX_VP(INST_DEST_COL0); 825 } else 826 if (fdec->Semantic.Index == 1) { 827 hw = NVFX_VP(INST_DEST_COL1); 828 } else { 829 NOUVEAU_ERR("bad colour semantic index\n"); 830 return FALSE; 831 } 832 break; 833 case TGSI_SEMANTIC_BCOLOR: 834 if (fdec->Semantic.Index == 0) { 835 hw = NVFX_VP(INST_DEST_BFC0); 836 } else 837 if (fdec->Semantic.Index == 1) { 838 hw = NVFX_VP(INST_DEST_BFC1); 839 } else { 840 NOUVEAU_ERR("bad bcolour semantic index\n"); 841 return FALSE; 842 } 843 break; 844 case TGSI_SEMANTIC_FOG: 845 hw = NVFX_VP(INST_DEST_FOGC); 846 break; 847 case TGSI_SEMANTIC_PSIZE: 848 hw = NVFX_VP(INST_DEST_PSZ); 849 break; 850 case TGSI_SEMANTIC_GENERIC: 851 for (i = 0; i < num_texcoords; i++) { 852 if (vpc->vp->texcoord[i] == fdec->Semantic.Index) { 853 hw = NVFX_VP(INST_DEST_TC(i)); 854 break; 855 } 856 } 857 858 if (i == num_texcoords) { 859 vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0); 860 return TRUE; 861 } 862 break; 863 case TGSI_SEMANTIC_EDGEFLAG: 864 /* not really an error just a fallback */ 865 NOUVEAU_ERR("cannot handle edgeflag output\n"); 866 return FALSE; 867 default: 868 NOUVEAU_ERR("bad output semantic\n"); 869 return FALSE; 870 } 871 872 vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 873 return TRUE; 874} 875 876static boolean 877nvfx_vertprog_prepare(struct nv30_context *nv30, struct nvfx_vpc *vpc) 878{ 879 struct tgsi_parse_context p; 880 int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i; 881 882 tgsi_parse_init(&p, vpc->pipe.tokens); 883 while (!tgsi_parse_end_of_tokens(&p)) { 884 const union tgsi_full_token *tok = &p.FullToken; 885 886 tgsi_parse_token(&p); 887 switch(tok->Token.Type) { 888 case TGSI_TOKEN_TYPE_IMMEDIATE: 889 nr_imm++; 890 break; 891 case TGSI_TOKEN_TYPE_DECLARATION: 892 { 893 const struct tgsi_full_declaration *fdec; 894 895 fdec = &p.FullToken.FullDeclaration; 896 switch (fdec->Declaration.File) { 897 case TGSI_FILE_TEMPORARY: 898 if (fdec->Range.Last > high_temp) { 899 high_temp = 900 fdec->Range.Last; 901 } 902 break; 903 case TGSI_FILE_ADDRESS: 904 if (fdec->Range.Last > high_addr) { 905 high_addr = 906 fdec->Range.Last; 907 } 908 break; 909 case TGSI_FILE_CONSTANT: 910 if (fdec->Range.Last > high_const) { 911 high_const = 912 fdec->Range.Last; 913 } 914 break; 915 case TGSI_FILE_OUTPUT: 916 if (!nvfx_vertprog_parse_decl_output(nv30, vpc, fdec)) 917 return FALSE; 918 break; 919 default: 920 break; 921 } 922 } 923 break; 924 default: 925 break; 926 } 927 } 928 tgsi_parse_free(&p); 929 930 if (nr_imm) { 931 vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg)); 932 assert(vpc->imm); 933 } 934 935 if (++high_temp) { 936 vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 937 for (i = 0; i < high_temp; i++) 938 vpc->r_temp[i] = temp(vpc); 939 } 940 941 if (++high_addr) { 942 vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg)); 943 for (i = 0; i < high_addr; i++) 944 vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i); 945 } 946 947 if(++high_const) { 948 vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg)); 949 for (i = 0; i < high_const; i++) 950 vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0); 951 } 952 953 vpc->r_temps_discard = 0; 954 return TRUE; 955} 956 957DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) 958 959boolean 960_nvfx_vertprog_translate(struct nv30_context *nv30, struct nv30_vertprog *vp) 961{ 962 struct tgsi_parse_context parse; 963 struct nvfx_vpc *vpc = NULL; 964 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 965 struct util_dynarray insns; 966 int i, ucps; 967 968 vp->translated = FALSE; 969 vp->nr_insns = 0; 970 vp->nr_consts = 0; 971 972 vpc = CALLOC_STRUCT(nvfx_vpc); 973 if (!vpc) 974 return FALSE; 975 vpc->nv30 = nv30; 976 vpc->vp = vp; 977 vpc->pipe = vp->pipe; 978 vpc->info = &vp->info; 979 vpc->cvtx_idx = -1; 980 981 if (!nvfx_vertprog_prepare(nv30, vpc)) { 982 FREE(vpc); 983 return FALSE; 984 } 985 986 /* Redirect post-transform vertex position to a temp if user clip 987 * planes are enabled. We need to append code to the vtxprog 988 * to handle clip planes later. 989 */ 990 if (vp->enabled_ucps && vpc->cvtx_idx < 0) { 991 vpc->r_result[vpc->hpos_idx] = temp(vpc); 992 vpc->r_temps_discard = 0; 993 vpc->cvtx_idx = vpc->hpos_idx; 994 } 995 996 util_dynarray_init(&insns); 997 998 tgsi_parse_init(&parse, vp->pipe.tokens); 999 while (!tgsi_parse_end_of_tokens(&parse)) { 1000 tgsi_parse_token(&parse); 1001 1002 switch (parse.FullToken.Token.Type) { 1003 case TGSI_TOKEN_TYPE_IMMEDIATE: 1004 { 1005 const struct tgsi_full_immediate *imm; 1006 1007 imm = &parse.FullToken.FullImmediate; 1008 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1009 assert(imm->Immediate.NrTokens == 4 + 1); 1010 vpc->imm[vpc->nr_imm++] = 1011 constant(vpc, -1, 1012 imm->u[0].Float, 1013 imm->u[1].Float, 1014 imm->u[2].Float, 1015 imm->u[3].Float); 1016 } 1017 break; 1018 case TGSI_TOKEN_TYPE_INSTRUCTION: 1019 { 1020 const struct tgsi_full_instruction *finst; 1021 unsigned idx = insns.size >> 2; 1022 util_dynarray_append(&insns, unsigned, vp->nr_insns); 1023 finst = &parse.FullToken.FullInstruction; 1024 if (!nvfx_vertprog_parse_instruction(nv30, vpc, idx, finst)) 1025 goto out; 1026 } 1027 break; 1028 default: 1029 break; 1030 } 1031 } 1032 1033 util_dynarray_append(&insns, unsigned, vp->nr_insns); 1034 1035 for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1036 { 1037 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i); 1038 struct nvfx_relocation hw_reloc; 1039 1040 hw_reloc.location = label_reloc->location; 1041 hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target]; 1042 1043 //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target); 1044 1045 util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc); 1046 } 1047 util_dynarray_fini(&insns); 1048 util_dynarray_trim(&vp->branch_relocs); 1049 1050 /* XXX: what if we add a RET before?! make sure we jump here...*/ 1051 1052 /* Write out HPOS if it was redirected to a temp earlier */ 1053 if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { 1054 struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT, 1055 NVFX_VP(INST_DEST_POS)); 1056 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); 1057 1058 nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none)); 1059 } 1060 1061 /* Insert code to handle user clip planes */ 1062 ucps = vp->enabled_ucps; 1063 while (ucps) { 1064 int i = ffs(ucps) - 1; ucps &= ~(1 << i); 1065 struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i)); 1066 struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, 512 + i)); 1067 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->cvtx_idx]); 1068 unsigned mask; 1069 1070 if(nv30->is_nv4x) 1071 { 1072 switch (i) { 1073 case 0: case 3: mask = NVFX_VP_MASK_Y; break; 1074 case 1: case 4: mask = NVFX_VP_MASK_Z; break; 1075 case 2: case 5: mask = NVFX_VP_MASK_W; break; 1076 default: 1077 NOUVEAU_ERR("invalid clip dist #%d\n", i); 1078 goto out; 1079 } 1080 } 1081 else 1082 mask = NVFX_VP_MASK_X; 1083 1084 nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none)); 1085 } 1086 1087 if (vpc->vp->nr_insns) 1088 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 1089 1090 if(debug_get_option_nvfx_dump_vp()) 1091 { 1092 debug_printf("\n"); 1093 tgsi_dump(vpc->pipe.tokens, 0); 1094 1095 debug_printf("\n%s vertex program:\n", nv30->is_nv4x ? "nv4x" : "nv3x"); 1096 for (i = 0; i < vp->nr_insns; i++) 1097 debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); 1098 debug_printf("\n"); 1099 } 1100 1101 vp->translated = TRUE; 1102 1103out: 1104 tgsi_parse_free(&parse); 1105 if(vpc) { 1106 util_dynarray_fini(&vpc->label_relocs); 1107 util_dynarray_fini(&vpc->loop_stack); 1108 FREE(vpc->r_temp); 1109 FREE(vpc->r_address); 1110 FREE(vpc->r_const); 1111 FREE(vpc->imm); 1112 FREE(vpc); 1113 } 1114 1115 return vp->translated; 1116} 1117