nvfx_vertprog.c revision 897d2351322e4c516eee622b3f49eedca7a2e308
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "util/u_linkage.h" 5#include "util/u_debug.h" 6 7#include "pipe/p_shader_tokens.h" 8#include "tgsi/tgsi_parse.h" 9#include "tgsi/tgsi_dump.h" 10#include "tgsi/tgsi_util.h" 11#include "tgsi/tgsi_ureg.h" 12 13#include "draw/draw_context.h" 14 15#include "nv30-40_3d.xml.h" 16#include "nv30_context.h" 17#include "nv30_resource.h" 18 19/* TODO (at least...): 20 * 1. Indexed consts + ARL 21 * 3. NV_vp11, NV_vp2, NV_vp3 features 22 * - extra arith opcodes 23 * - branching 24 * - texture sampling 25 * - indexed attribs 26 * - indexed results 27 * 4. bugs 28 */ 29 30#include "nv30_vertprog.h" 31#include "nv40_vertprog.h" 32 33struct nvfx_loop_entry { 34 unsigned brk_target; 35 unsigned cont_target; 36}; 37 38struct nvfx_vpc { 39 struct nv30_context* nv30; 40 struct pipe_shader_state pipe; 41 struct nv30_vertprog *vp; 42 struct tgsi_shader_info* info; 43 44 struct nv30_vertprog_exec *vpi; 45 46 unsigned r_temps; 47 unsigned r_temps_discard; 48 struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; 49 struct nvfx_reg *r_address; 50 struct nvfx_reg *r_temp; 51 struct nvfx_reg *r_const; 52 struct nvfx_reg r_0_1; 53 54 struct nvfx_reg *imm; 55 unsigned nr_imm; 56 57 int hpos_idx; 58 int cvtx_idx; 59 60 struct util_dynarray label_relocs; 61 struct util_dynarray loop_stack; 62}; 63 64static struct nvfx_reg 65temp(struct nvfx_vpc *vpc) 66{ 67 int idx = ffs(~vpc->r_temps) - 1; 68 69 if (idx < 0) { 70 NOUVEAU_ERR("out of temps!!\n"); 71 assert(0); 72 return nvfx_reg(NVFXSR_TEMP, 0); 73 } 74 75 vpc->r_temps |= (1 << idx); 76 vpc->r_temps_discard |= (1 << idx); 77 return nvfx_reg(NVFXSR_TEMP, idx); 78} 79 80static inline void 81release_temps(struct nvfx_vpc *vpc) 82{ 83 vpc->r_temps &= ~vpc->r_temps_discard; 84 vpc->r_temps_discard = 0; 85} 86 87static struct nvfx_reg 88constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) 89{ 90 struct nv30_vertprog *vp = vpc->vp; 91 struct nv30_vertprog_data *vpd; 92 int idx; 93 94 if (pipe >= 0) { 95 for (idx = 0; idx < vp->nr_consts; idx++) { 96 if (vp->consts[idx].index == pipe) 97 return nvfx_reg(NVFXSR_CONST, idx); 98 } 99 } 100 101 idx = vp->nr_consts++; 102 vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); 103 vpd = &vp->consts[idx]; 104 105 vpd->index = pipe; 106 vpd->value[0] = x; 107 vpd->value[1] = y; 108 vpd->value[2] = z; 109 vpd->value[3] = w; 110 return nvfx_reg(NVFXSR_CONST, idx); 111} 112 113#define arith(s,t,o,d,m,s0,s1,s2) \ 114 nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2)) 115 116static void 117emit_src(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, 118 int pos, struct nvfx_src src) 119{ 120 struct nv30_vertprog *vp = vpc->vp; 121 uint32_t sr = 0; 122 struct nvfx_relocation reloc; 123 124 switch (src.reg.type) { 125 case NVFXSR_TEMP: 126 sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); 127 sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); 128 break; 129 case NVFXSR_INPUT: 130 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << 131 NVFX_VP(SRC_REG_TYPE_SHIFT)); 132 vp->ir |= (1 << src.reg.index); 133 hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); 134 break; 135 case NVFXSR_CONST: 136 sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << 137 NVFX_VP(SRC_REG_TYPE_SHIFT)); 138 if (src.reg.index < 512) { 139 reloc.location = vp->nr_insns - 1; 140 reloc.target = src.reg.index; 141 util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc); 142 } else { 143 hw[1] |= (src.reg.index - 512) << NVFX_VP(INST_CONST_SRC_SHIFT); 144 } 145 break; 146 case NVFXSR_NONE: 147 sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << 148 NVFX_VP(SRC_REG_TYPE_SHIFT)); 149 break; 150 default: 151 assert(0); 152 } 153 154 if (src.negate) 155 sr |= NVFX_VP(SRC_NEGATE); 156 157 if (src.abs) 158 hw[0] |= (1 << (21 + pos)); 159 160 sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | 161 (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | 162 (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | 163 (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); 164 165 if(src.indirect) { 166 if(src.reg.type == NVFXSR_CONST) 167 hw[3] |= NVFX_VP(INST_INDEX_CONST); 168 else if(src.reg.type == NVFXSR_INPUT) 169 hw[0] |= NVFX_VP(INST_INDEX_INPUT); 170 else 171 assert(0); 172 if(src.indirect_reg) 173 hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); 174 hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); 175 } 176 177 switch (pos) { 178 case 0: 179 hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> 180 NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); 181 hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << 182 NVFX_VP(INST_SRC0L_SHIFT); 183 break; 184 case 1: 185 hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); 186 break; 187 case 2: 188 hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> 189 NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); 190 hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << 191 NVFX_VP(INST_SRC2L_SHIFT); 192 break; 193 default: 194 assert(0); 195 } 196} 197 198static void 199emit_dst(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw, 200 int slot, struct nvfx_reg dst) 201{ 202 struct nv30_vertprog *vp = vpc->vp; 203 204 switch (dst.type) { 205 case NVFXSR_NONE: 206 if(!nv30->is_nv4x) 207 hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK; 208 else { 209 hw[3] |= NV40_VP_INST_DEST_MASK; 210 if (slot == 0) 211 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; 212 else 213 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 214 } 215 break; 216 case NVFXSR_TEMP: 217 if(!nv30->is_nv4x) 218 hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); 219 else { 220 hw[3] |= NV40_VP_INST_DEST_MASK; 221 if (slot == 0) 222 hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); 223 else 224 hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); 225 } 226 break; 227 case NVFXSR_OUTPUT: 228 /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ 229 if(nv30->is_nv4x) { 230 switch (dst.index) { 231 case NV30_VP_INST_DEST_CLP(0): 232 dst.index = NVFX_VP(INST_DEST_FOGC); 233 vp->or |= (1 << 6); 234 break; 235 case NV30_VP_INST_DEST_CLP(1): 236 dst.index = NVFX_VP(INST_DEST_FOGC); 237 vp->or |= (1 << 7); 238 break; 239 case NV30_VP_INST_DEST_CLP(2): 240 dst.index = NVFX_VP(INST_DEST_FOGC); 241 vp->or |= (1 << 8); 242 break; 243 case NV30_VP_INST_DEST_CLP(3): 244 dst.index = NVFX_VP(INST_DEST_PSZ); 245 vp->or |= (1 << 9); 246 break; 247 case NV30_VP_INST_DEST_CLP(4): 248 dst.index = NVFX_VP(INST_DEST_PSZ); 249 vp->or |= (1 << 10); 250 break; 251 case NV30_VP_INST_DEST_CLP(5): 252 dst.index = NVFX_VP(INST_DEST_PSZ); 253 vp->or |= (1 << 11); 254 break; 255 case NV40_VP_INST_DEST_COL0: vp->or |= (1 << 0); break; 256 case NV40_VP_INST_DEST_COL1: vp->or |= (1 << 1); break; 257 case NV40_VP_INST_DEST_BFC0: vp->or |= (1 << 2); break; 258 case NV40_VP_INST_DEST_BFC1: vp->or |= (1 << 3); break; 259 case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; 260 case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; 261 } 262 } 263 264 if(!nv30->is_nv4x) { 265 hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); 266 hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK; 267 268 /*XXX: no way this is entirely correct, someone needs to 269 * figure out what exactly it is. 270 */ 271 hw[3] |= 0x800; 272 } else { 273 hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); 274 if (slot == 0) { 275 hw[0] |= NV40_VP_INST_VEC_RESULT; 276 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; 277 } else { 278 hw[3] |= NV40_VP_INST_SCA_RESULT; 279 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 280 } 281 } 282 break; 283 default: 284 assert(0); 285 } 286} 287 288static void 289nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) 290{ 291 struct nv30_context *nv30 = vpc->nv30; 292 struct nv30_vertprog *vp = vpc->vp; 293 unsigned slot = insn.op >> 7; 294 unsigned op = insn.op & 0x7f; 295 uint32_t *hw; 296 297 vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); 298 vpc->vpi = &vp->insns[vp->nr_insns - 1]; 299 memset(vpc->vpi, 0, sizeof(*vpc->vpi)); 300 301 hw = vpc->vpi->data; 302 303 if (insn.cc_test != NVFX_COND_TR) 304 hw[0] |= NVFX_VP(INST_COND_TEST_ENABLE); 305 hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT)); 306 hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | 307 (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | 308 (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | 309 (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT))); 310 if(insn.cc_update) 311 hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE); 312 313 if(insn.sat) { 314 assert(nv30->use_nv4x); 315 if(nv30->use_nv4x) 316 hw[0] |= NV40_VP_INST_SATURATE; 317 } 318 319 if(!nv30->is_nv4x) { 320 if(slot == 0) 321 hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); 322 else { 323 hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); 324 hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); 325 } 326// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); 327// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); 328 329 if (insn.dst.type == NVFXSR_OUTPUT) { 330 if (slot) 331 hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); 332 else 333 hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); 334 } else { 335 if (slot) 336 hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); 337 else 338 hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); 339 } 340 } else { 341 if (slot == 0) { 342 hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); 343 hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; 344 hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); 345 } else { 346 hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); 347 hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ; 348 hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); 349 } 350 } 351 352 emit_dst(nv30, vpc, hw, slot, insn.dst); 353 emit_src(nv30, vpc, hw, 0, insn.src[0]); 354 emit_src(nv30, vpc, hw, 1, insn.src[1]); 355 emit_src(nv30, vpc, hw, 2, insn.src[2]); 356 357// if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL) 358// hw[3] |= NV40_VP_INST_SCA_RESULT; 359} 360 361static inline struct nvfx_src 362tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { 363 struct nvfx_src src; 364 365 switch (fsrc->Register.File) { 366 case TGSI_FILE_INPUT: 367 src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index); 368 break; 369 case TGSI_FILE_CONSTANT: 370 src.reg = vpc->r_const[fsrc->Register.Index]; 371 break; 372 case TGSI_FILE_IMMEDIATE: 373 src.reg = vpc->imm[fsrc->Register.Index]; 374 break; 375 case TGSI_FILE_TEMPORARY: 376 src.reg = vpc->r_temp[fsrc->Register.Index]; 377 break; 378 default: 379 NOUVEAU_ERR("bad src file\n"); 380 src.reg.index = 0; 381 src.reg.type = -1; 382 break; 383 } 384 385 src.abs = fsrc->Register.Absolute; 386 src.negate = fsrc->Register.Negate; 387 src.swz[0] = fsrc->Register.SwizzleX; 388 src.swz[1] = fsrc->Register.SwizzleY; 389 src.swz[2] = fsrc->Register.SwizzleZ; 390 src.swz[3] = fsrc->Register.SwizzleW; 391 src.indirect = 0; 392 src.indirect_reg = 0; 393 src.indirect_swz = 0; 394 395 if(fsrc->Register.Indirect) { 396 if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && 397 (fsrc->Register.File == TGSI_FILE_CONSTANT || 398 fsrc->Register.File == TGSI_FILE_INPUT)) { 399 src.indirect = 1; 400 src.indirect_reg = fsrc->Indirect.Index; 401 src.indirect_swz = fsrc->Indirect.SwizzleX; 402 } else { 403 src.reg.index = 0; 404 src.reg.type = -1; 405 } 406 } 407 408 return src; 409} 410 411static INLINE struct nvfx_reg 412tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { 413 struct nvfx_reg dst; 414 415 switch (fdst->Register.File) { 416 case TGSI_FILE_NULL: 417 dst = nvfx_reg(NVFXSR_NONE, 0); 418 break; 419 case TGSI_FILE_OUTPUT: 420 dst = vpc->r_result[fdst->Register.Index]; 421 break; 422 case TGSI_FILE_TEMPORARY: 423 dst = vpc->r_temp[fdst->Register.Index]; 424 break; 425 case TGSI_FILE_ADDRESS: 426 dst = vpc->r_address[fdst->Register.Index]; 427 break; 428 default: 429 NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File); 430 dst.index = 0; 431 dst.type = 0; 432 break; 433 } 434 435 return dst; 436} 437 438static inline int 439tgsi_mask(uint tgsi) 440{ 441 int mask = 0; 442 443 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; 444 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; 445 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; 446 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; 447 return mask; 448} 449 450static boolean 451nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc, 452 unsigned idx, const struct tgsi_full_instruction *finst) 453{ 454 struct nvfx_src src[3], tmp; 455 struct nvfx_reg dst; 456 struct nvfx_reg final_dst; 457 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 458 struct nvfx_insn insn; 459 struct nvfx_relocation reloc; 460 struct nvfx_loop_entry loop; 461 boolean sat = FALSE; 462 int mask; 463 int ai = -1, ci = -1, ii = -1; 464 int i; 465 unsigned sub_depth = 0; 466 467 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 468 const struct tgsi_full_src_register *fsrc; 469 470 fsrc = &finst->Src[i]; 471 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 472 src[i] = tgsi_src(vpc, fsrc); 473 } 474 } 475 476 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 477 const struct tgsi_full_src_register *fsrc; 478 479 fsrc = &finst->Src[i]; 480 481 switch (fsrc->Register.File) { 482 case TGSI_FILE_INPUT: 483 if (ai == -1 || ai == fsrc->Register.Index) { 484 ai = fsrc->Register.Index; 485 src[i] = tgsi_src(vpc, fsrc); 486 } else { 487 src[i] = nvfx_src(temp(vpc)); 488 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 489 tgsi_src(vpc, fsrc), none, none)); 490 } 491 break; 492 case TGSI_FILE_CONSTANT: 493 if ((ci == -1 && ii == -1) || 494 ci == fsrc->Register.Index) { 495 ci = fsrc->Register.Index; 496 src[i] = tgsi_src(vpc, fsrc); 497 } else { 498 src[i] = nvfx_src(temp(vpc)); 499 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 500 tgsi_src(vpc, fsrc), none, none)); 501 } 502 break; 503 case TGSI_FILE_IMMEDIATE: 504 if ((ci == -1 && ii == -1) || 505 ii == fsrc->Register.Index) { 506 ii = fsrc->Register.Index; 507 src[i] = tgsi_src(vpc, fsrc); 508 } else { 509 src[i] = nvfx_src(temp(vpc)); 510 nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL, 511 tgsi_src(vpc, fsrc), none, none)); 512 } 513 break; 514 case TGSI_FILE_TEMPORARY: 515 /* handled above */ 516 break; 517 default: 518 NOUVEAU_ERR("bad src file\n"); 519 return FALSE; 520 } 521 } 522 523 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 524 if(src[i].reg.type < 0) 525 return FALSE; 526 } 527 528 if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS && 529 finst->Instruction.Opcode != TGSI_OPCODE_ARL) 530 return FALSE; 531 532 final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]); 533 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 534 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { 535 assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL); 536 if (nv30->use_nv4x) 537 sat = TRUE; 538 else 539 if(dst.type != NVFXSR_TEMP) 540 dst = temp(vpc); 541 } 542 543 switch (finst->Instruction.Opcode) { 544 case TGSI_OPCODE_ABS: 545 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none)); 546 break; 547 case TGSI_OPCODE_ADD: 548 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1])); 549 break; 550 case TGSI_OPCODE_ARL: 551 nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); 552 break; 553 case TGSI_OPCODE_CEIL: 554 tmp = nvfx_src(temp(vpc)); 555 nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none)); 556 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none)); 557 break; 558 case TGSI_OPCODE_CMP: 559 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); 560 insn.cc_update = 1; 561 nvfx_vp_emit(vpc, insn); 562 563 insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none); 564 insn.cc_test = NVFX_COND_GE; 565 nvfx_vp_emit(vpc, insn); 566 567 insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none); 568 insn.cc_test = NVFX_COND_LT; 569 nvfx_vp_emit(vpc, insn); 570 break; 571 case TGSI_OPCODE_COS: 572 nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0])); 573 break; 574 case TGSI_OPCODE_DP2: 575 tmp = nvfx_src(temp(vpc)); 576 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none)); 577 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y))); 578 break; 579 case TGSI_OPCODE_DP3: 580 nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none)); 581 break; 582 case TGSI_OPCODE_DP4: 583 nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none)); 584 break; 585 case TGSI_OPCODE_DPH: 586 nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none)); 587 break; 588 case TGSI_OPCODE_DST: 589 nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none)); 590 break; 591 case TGSI_OPCODE_EX2: 592 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0])); 593 break; 594 case TGSI_OPCODE_EXP: 595 nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0])); 596 break; 597 case TGSI_OPCODE_FLR: 598 nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none)); 599 break; 600 case TGSI_OPCODE_FRC: 601 nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none)); 602 break; 603 case TGSI_OPCODE_LG2: 604 nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0])); 605 break; 606 case TGSI_OPCODE_LIT: 607 nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0])); 608 break; 609 case TGSI_OPCODE_LOG: 610 nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0])); 611 break; 612 case TGSI_OPCODE_LRP: 613 tmp = nvfx_src(temp(vpc)); 614 nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2])); 615 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp)); 616 break; 617 case TGSI_OPCODE_MAD: 618 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2])); 619 break; 620 case TGSI_OPCODE_MAX: 621 nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none)); 622 break; 623 case TGSI_OPCODE_MIN: 624 nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none)); 625 break; 626 case TGSI_OPCODE_MOV: 627 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none)); 628 break; 629 case TGSI_OPCODE_MUL: 630 nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none)); 631 break; 632 case TGSI_OPCODE_NOP: 633 break; 634 case TGSI_OPCODE_POW: 635 tmp = nvfx_src(temp(vpc)); 636 nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X))); 637 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none)); 638 nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X))); 639 break; 640 case TGSI_OPCODE_RCP: 641 nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0])); 642 break; 643 case TGSI_OPCODE_RSQ: 644 nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0]))); 645 break; 646 case TGSI_OPCODE_SEQ: 647 nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none)); 648 break; 649 case TGSI_OPCODE_SFL: 650 nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none)); 651 break; 652 case TGSI_OPCODE_SGE: 653 nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none)); 654 break; 655 case TGSI_OPCODE_SGT: 656 nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none)); 657 break; 658 case TGSI_OPCODE_SIN: 659 nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0])); 660 break; 661 case TGSI_OPCODE_SLE: 662 nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none)); 663 break; 664 case TGSI_OPCODE_SLT: 665 nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none)); 666 break; 667 case TGSI_OPCODE_SNE: 668 nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none)); 669 break; 670 case TGSI_OPCODE_SSG: 671 nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none)); 672 break; 673 case TGSI_OPCODE_STR: 674 nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none)); 675 break; 676 case TGSI_OPCODE_SUB: 677 nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); 678 break; 679 case TGSI_OPCODE_TRUNC: 680 tmp = nvfx_src(temp(vpc)); 681 insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); 682 insn.cc_update = 1; 683 nvfx_vp_emit(vpc, insn); 684 685 nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none)); 686 nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none)); 687 688 insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none); 689 insn.cc_test = NVFX_COND_LT; 690 nvfx_vp_emit(vpc, insn); 691 break; 692 case TGSI_OPCODE_XPD: 693 tmp = nvfx_src(temp(vpc)); 694 nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none)); 695 nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp))); 696 break; 697 case TGSI_OPCODE_IF: 698 insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none); 699 insn.cc_update = 1; 700 nvfx_vp_emit(vpc, insn); 701 702 reloc.location = vpc->vp->nr_insns; 703 reloc.target = finst->Label.Label + 1; 704 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 705 706 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); 707 insn.cc_test = NVFX_COND_EQ; 708 insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0; 709 nvfx_vp_emit(vpc, insn); 710 break; 711 case TGSI_OPCODE_ELSE: 712 case TGSI_OPCODE_BRA: 713 case TGSI_OPCODE_CAL: 714 reloc.location = vpc->vp->nr_insns; 715 reloc.target = finst->Label.Label; 716 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 717 718 if(finst->Instruction.Opcode == TGSI_OPCODE_CAL) 719 insn = arith(0, SCA, CAL, none.reg, 0, none, none, none); 720 else 721 insn = arith(0, SCA, BRA, none.reg, 0, none, none, none); 722 nvfx_vp_emit(vpc, insn); 723 break; 724 case TGSI_OPCODE_RET: 725 if(sub_depth || !vpc->vp->enabled_ucps) { 726 tmp = none; 727 tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0; 728 nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp)); 729 } else { 730 reloc.location = vpc->vp->nr_insns; 731 reloc.target = vpc->info->num_instructions; 732 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 733 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 734 } 735 break; 736 case TGSI_OPCODE_BGNSUB: 737 ++sub_depth; 738 break; 739 case TGSI_OPCODE_ENDSUB: 740 --sub_depth; 741 break; 742 case TGSI_OPCODE_ENDIF: 743 /* nothing to do here */ 744 break; 745 case TGSI_OPCODE_BGNLOOP: 746 loop.cont_target = idx; 747 loop.brk_target = finst->Label.Label + 1; 748 util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop); 749 break; 750 case TGSI_OPCODE_ENDLOOP: 751 loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry); 752 753 reloc.location = vpc->vp->nr_insns; 754 reloc.target = loop.cont_target; 755 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 756 757 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 758 break; 759 case TGSI_OPCODE_CONT: 760 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); 761 762 reloc.location = vpc->vp->nr_insns; 763 reloc.target = loop.cont_target; 764 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 765 766 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 767 break; 768 case TGSI_OPCODE_BRK: 769 loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry); 770 771 reloc.location = vpc->vp->nr_insns; 772 reloc.target = loop.brk_target; 773 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 774 775 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 776 break; 777 case TGSI_OPCODE_END: 778 assert(!sub_depth); 779 if(vpc->vp->enabled_ucps) { 780 if(idx != (vpc->info->num_instructions - 1)) { 781 reloc.location = vpc->vp->nr_insns; 782 reloc.target = vpc->info->num_instructions; 783 util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc); 784 nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none)); 785 } 786 } else { 787 if(vpc->vp->nr_insns) 788 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 789 nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none)); 790 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 791 } 792 break; 793 default: 794 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 795 return FALSE; 796 } 797 798 if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nv30->use_nv4x) { 799 if (!vpc->r_0_1.type) 800 vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0); 801 nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none)); 802 nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none)); 803 } 804 805 release_temps(vpc); 806 return TRUE; 807} 808 809static boolean 810nvfx_vertprog_parse_decl_output(struct nv30_context *nv30, struct nvfx_vpc *vpc, 811 const struct tgsi_full_declaration *fdec) 812{ 813 unsigned num_texcoords = nv30->is_nv4x ? 10 : 8; 814 unsigned idx = fdec->Range.First; 815 int hw = 0, i; 816 817 switch (fdec->Semantic.Name) { 818 case TGSI_SEMANTIC_POSITION: 819 hw = NVFX_VP(INST_DEST_POS); 820 vpc->hpos_idx = idx; 821 break; 822 case TGSI_SEMANTIC_CLIPVERTEX: 823 vpc->r_result[idx] = temp(vpc); 824 vpc->r_temps_discard = 0; 825 vpc->cvtx_idx = idx; 826 return TRUE; 827 case TGSI_SEMANTIC_COLOR: 828 if (fdec->Semantic.Index == 0) { 829 hw = NVFX_VP(INST_DEST_COL0); 830 } else 831 if (fdec->Semantic.Index == 1) { 832 hw = NVFX_VP(INST_DEST_COL1); 833 } else { 834 NOUVEAU_ERR("bad colour semantic index\n"); 835 return FALSE; 836 } 837 break; 838 case TGSI_SEMANTIC_BCOLOR: 839 if (fdec->Semantic.Index == 0) { 840 hw = NVFX_VP(INST_DEST_BFC0); 841 } else 842 if (fdec->Semantic.Index == 1) { 843 hw = NVFX_VP(INST_DEST_BFC1); 844 } else { 845 NOUVEAU_ERR("bad bcolour semantic index\n"); 846 return FALSE; 847 } 848 break; 849 case TGSI_SEMANTIC_FOG: 850 hw = NVFX_VP(INST_DEST_FOGC); 851 break; 852 case TGSI_SEMANTIC_PSIZE: 853 hw = NVFX_VP(INST_DEST_PSZ); 854 break; 855 case TGSI_SEMANTIC_GENERIC: 856 for (i = 0; i < num_texcoords; i++) { 857 if (vpc->vp->texcoord[i] == fdec->Semantic.Index) { 858 hw = NVFX_VP(INST_DEST_TC(i)); 859 break; 860 } 861 } 862 863 if (i == num_texcoords) { 864 vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0); 865 return TRUE; 866 } 867 break; 868 case TGSI_SEMANTIC_EDGEFLAG: 869 /* not really an error just a fallback */ 870 NOUVEAU_ERR("cannot handle edgeflag output\n"); 871 return FALSE; 872 default: 873 NOUVEAU_ERR("bad output semantic\n"); 874 return FALSE; 875 } 876 877 vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw); 878 return TRUE; 879} 880 881static boolean 882nvfx_vertprog_prepare(struct nv30_context *nv30, struct nvfx_vpc *vpc) 883{ 884 struct tgsi_parse_context p; 885 int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i; 886 887 tgsi_parse_init(&p, vpc->pipe.tokens); 888 while (!tgsi_parse_end_of_tokens(&p)) { 889 const union tgsi_full_token *tok = &p.FullToken; 890 891 tgsi_parse_token(&p); 892 switch(tok->Token.Type) { 893 case TGSI_TOKEN_TYPE_IMMEDIATE: 894 nr_imm++; 895 break; 896 case TGSI_TOKEN_TYPE_DECLARATION: 897 { 898 const struct tgsi_full_declaration *fdec; 899 900 fdec = &p.FullToken.FullDeclaration; 901 switch (fdec->Declaration.File) { 902 case TGSI_FILE_TEMPORARY: 903 if (fdec->Range.Last > high_temp) { 904 high_temp = 905 fdec->Range.Last; 906 } 907 break; 908 case TGSI_FILE_ADDRESS: 909 if (fdec->Range.Last > high_addr) { 910 high_addr = 911 fdec->Range.Last; 912 } 913 break; 914 case TGSI_FILE_CONSTANT: 915 if (fdec->Range.Last > high_const) { 916 high_const = 917 fdec->Range.Last; 918 } 919 break; 920 case TGSI_FILE_OUTPUT: 921 if (!nvfx_vertprog_parse_decl_output(nv30, vpc, fdec)) 922 return FALSE; 923 break; 924 default: 925 break; 926 } 927 } 928 break; 929 default: 930 break; 931 } 932 } 933 tgsi_parse_free(&p); 934 935 if (nr_imm) { 936 vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg)); 937 assert(vpc->imm); 938 } 939 940 if (++high_temp) { 941 vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg)); 942 for (i = 0; i < high_temp; i++) 943 vpc->r_temp[i] = temp(vpc); 944 } 945 946 if (++high_addr) { 947 vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg)); 948 for (i = 0; i < high_addr; i++) 949 vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i); 950 } 951 952 if(++high_const) { 953 vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg)); 954 for (i = 0; i < high_const; i++) 955 vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0); 956 } 957 958 vpc->r_temps_discard = 0; 959 return TRUE; 960} 961 962DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) 963 964boolean 965_nvfx_vertprog_translate(struct nv30_context *nv30, struct nv30_vertprog *vp) 966{ 967 struct tgsi_parse_context parse; 968 struct nvfx_vpc *vpc = NULL; 969 struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); 970 struct util_dynarray insns; 971 int i, ucps; 972 973 vp->translated = FALSE; 974 vp->nr_insns = 0; 975 vp->nr_consts = 0; 976 977 vpc = CALLOC_STRUCT(nvfx_vpc); 978 if (!vpc) 979 return FALSE; 980 vpc->nv30 = nv30; 981 vpc->vp = vp; 982 vpc->pipe = vp->pipe; 983 vpc->info = &vp->info; 984 vpc->cvtx_idx = -1; 985 986 if (!nvfx_vertprog_prepare(nv30, vpc)) { 987 FREE(vpc); 988 return FALSE; 989 } 990 991 /* Redirect post-transform vertex position to a temp if user clip 992 * planes are enabled. We need to append code to the vtxprog 993 * to handle clip planes later. 994 */ 995 if (vp->enabled_ucps && vpc->cvtx_idx < 0) { 996 vpc->r_result[vpc->hpos_idx] = temp(vpc); 997 vpc->r_temps_discard = 0; 998 vpc->cvtx_idx = vpc->hpos_idx; 999 } 1000 1001 util_dynarray_init(&insns); 1002 1003 tgsi_parse_init(&parse, vp->pipe.tokens); 1004 while (!tgsi_parse_end_of_tokens(&parse)) { 1005 tgsi_parse_token(&parse); 1006 1007 switch (parse.FullToken.Token.Type) { 1008 case TGSI_TOKEN_TYPE_IMMEDIATE: 1009 { 1010 const struct tgsi_full_immediate *imm; 1011 1012 imm = &parse.FullToken.FullImmediate; 1013 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 1014 assert(imm->Immediate.NrTokens == 4 + 1); 1015 vpc->imm[vpc->nr_imm++] = 1016 constant(vpc, -1, 1017 imm->u[0].Float, 1018 imm->u[1].Float, 1019 imm->u[2].Float, 1020 imm->u[3].Float); 1021 } 1022 break; 1023 case TGSI_TOKEN_TYPE_INSTRUCTION: 1024 { 1025 const struct tgsi_full_instruction *finst; 1026 unsigned idx = insns.size >> 2; 1027 util_dynarray_append(&insns, unsigned, vp->nr_insns); 1028 finst = &parse.FullToken.FullInstruction; 1029 if (!nvfx_vertprog_parse_instruction(nv30, vpc, idx, finst)) 1030 goto out; 1031 } 1032 break; 1033 default: 1034 break; 1035 } 1036 } 1037 1038 util_dynarray_append(&insns, unsigned, vp->nr_insns); 1039 1040 for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation)) 1041 { 1042 struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i); 1043 struct nvfx_relocation hw_reloc; 1044 1045 hw_reloc.location = label_reloc->location; 1046 hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target]; 1047 1048 //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target); 1049 1050 util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc); 1051 } 1052 util_dynarray_fini(&insns); 1053 util_dynarray_trim(&vp->branch_relocs); 1054 1055 /* XXX: what if we add a RET before?! make sure we jump here...*/ 1056 1057 /* Write out HPOS if it was redirected to a temp earlier */ 1058 if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { 1059 struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT, 1060 NVFX_VP(INST_DEST_POS)); 1061 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); 1062 1063 nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none)); 1064 } 1065 1066 /* Insert code to handle user clip planes */ 1067 ucps = vp->enabled_ucps; 1068 while (ucps) { 1069 int i = ffs(ucps) - 1; ucps &= ~(1 << i); 1070 struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i)); 1071 struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, 512 + i)); 1072 struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->cvtx_idx]); 1073 unsigned mask; 1074 1075 if(nv30->is_nv4x) 1076 { 1077 switch (i) { 1078 case 0: case 3: mask = NVFX_VP_MASK_Y; break; 1079 case 1: case 4: mask = NVFX_VP_MASK_Z; break; 1080 case 2: case 5: mask = NVFX_VP_MASK_W; break; 1081 default: 1082 NOUVEAU_ERR("invalid clip dist #%d\n", i); 1083 goto out; 1084 } 1085 } 1086 else 1087 mask = NVFX_VP_MASK_X; 1088 1089 nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none)); 1090 } 1091 1092 if (vpc->vp->nr_insns) 1093 vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; 1094 1095 if(debug_get_option_nvfx_dump_vp()) 1096 { 1097 debug_printf("\n"); 1098 tgsi_dump(vpc->pipe.tokens, 0); 1099 1100 debug_printf("\n%s vertex program:\n", nv30->is_nv4x ? "nv4x" : "nv3x"); 1101 for (i = 0; i < vp->nr_insns; i++) 1102 debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); 1103 debug_printf("\n"); 1104 } 1105 1106 vp->translated = TRUE; 1107 1108out: 1109 tgsi_parse_free(&parse); 1110 if(vpc) { 1111 util_dynarray_fini(&vpc->label_relocs); 1112 util_dynarray_fini(&vpc->loop_stack); 1113 FREE(vpc->r_temp); 1114 FREE(vpc->r_address); 1115 FREE(vpc->r_const); 1116 FREE(vpc->imm); 1117 FREE(vpc); 1118 } 1119 1120 return vp->translated; 1121} 1122