VertexProgram.cpp revision 894018228b0e0bdbd7aa7e8f47d4a9458789ca82
1// SwiftShader Software Renderer 2// 3// Copyright(c) 2005-2011 TransGaming Inc. 4// 5// All rights reserved. No part of this software may be copied, distributed, transmitted, 6// transcribed, stored in a retrieval system, translated into any human or computer 7// language by any means, or disclosed to third parties without the explicit written 8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express 9// or implied, including but not limited to any patent rights, are granted to you. 10// 11 12#include "VertexProgram.hpp" 13 14#include "Renderer.hpp" 15#include "VertexShader.hpp" 16#include "Vertex.hpp" 17#include "Half.hpp" 18#include "SamplerCore.hpp" 19#include "Debug.hpp" 20 21extern bool localShaderConstants; 22 23namespace sw 24{ 25 VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *vertexShader) : VertexRoutine(state), vertexShader(vertexShader) 26 { 27 returns = false; 28 ifDepth = 0; 29 loopRepDepth = 0; 30 breakDepth = 0; 31 32 for(int i = 0; i < 2048; i++) 33 { 34 labelBlock[i] = 0; 35 } 36 } 37 38 VertexProgram::~VertexProgram() 39 { 40 for(int i = 0; i < 4; i++) 41 { 42 delete sampler[i]; 43 } 44 } 45 46 void VertexProgram::pipeline(Registers &r) 47 { 48 for(int i = 0; i < 4; i++) 49 { 50 sampler[i] = new SamplerCore(r.constants, state.samplerState[i]); 51 } 52 53 if(!state.preTransformed) 54 { 55 shader(r); 56 } 57 else 58 { 59 passThrough(r); 60 } 61 } 62 63 Color4f VertexProgram::readConstant(Registers &r, const Src &src, int offset) 64 { 65 Color4f c; 66 67 int i = src.index + offset; 68 bool relative = src.relative; 69 70 if(!relative) 71 { 72 c.r = c.g = c.b = c.a = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i])); 73 74 c.r = c.r.xxxx; 75 c.g = c.g.yyyy; 76 c.b = c.b.zzzz; 77 c.a = c.a.wwww; 78 79 if(localShaderConstants) // Constant may be known at compile time 80 { 81 for(int j = 0; j < vertexShader->getLength(); j++) 82 { 83 const ShaderInstruction &instruction = *vertexShader->getInstruction(j); 84 85 if(instruction.getOpcode() == ShaderOperation::OPCODE_DEF) 86 { 87 if(instruction.getDestinationParameter().index == i) 88 { 89 c.r = Float4(instruction.getSourceParameter(0).value); 90 c.g = Float4(instruction.getSourceParameter(1).value); 91 c.b = Float4(instruction.getSourceParameter(2).value); 92 c.a = Float4(instruction.getSourceParameter(3).value); 93 94 break; 95 } 96 } 97 } 98 } 99 } 100 else if(src.relativeType == Src::PARAMETER_LOOP) 101 { 102 Int loopCounter = r.aL[r.loopDepth]; 103 104 c.r = c.g = c.b = c.a = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16); 105 106 c.r = c.r.xxxx; 107 c.g = c.g.yyyy; 108 c.b = c.b.zzzz; 109 c.a = c.a.wwww; 110 } 111 else 112 { 113 Int index0; 114 Int index1; 115 Int index2; 116 Int index3; 117 118 Float4 a0_; 119 120 switch(src.relativeSwizzle & 0x03) 121 { 122 case 0: a0_ = r.a0.x; break; 123 case 1: a0_ = r.a0.y; break; 124 case 2: a0_ = r.a0.z; break; 125 case 3: a0_ = r.a0.w; break; 126 } 127 128 index0 = i + RoundInt(Float(a0_.x)); 129 index1 = i + RoundInt(Float(a0_.y)); 130 index2 = i + RoundInt(Float(a0_.z)); 131 index3 = i + RoundInt(Float(a0_.w)); 132 133 // Clamp to constant register range, c[256] = {0, 0, 0, 0} 134 index0 = IfThenElse(UInt(index0) > UInt(256), Int(256), index0); 135 index1 = IfThenElse(UInt(index1) > UInt(256), Int(256), index1); 136 index2 = IfThenElse(UInt(index2) > UInt(256), Int(256), index2); 137 index3 = IfThenElse(UInt(index3) > UInt(256), Int(256), index3); 138 139 c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16); 140 c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16); 141 c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16); 142 c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16); 143 144 transpose4x4(c.x, c.y, c.z, c.w); 145 } 146 147 return c; 148 } 149 150 void VertexProgram::shader(Registers &r) 151 { 152 // vertexShader->print("VertexShader-%0.16llX.txt", state.shaderHash); 153 154 unsigned short version = vertexShader->getVersion(); 155 156 r.enableIndex = 0; 157 r.stackIndex = 0; 158 159 for(int i = 0; i < vertexShader->getLength(); i++) 160 { 161 const ShaderInstruction *instruction = vertexShader->getInstruction(i); 162 Op::Opcode opcode = instruction->getOpcode(); 163 164 // #ifndef NDEBUG // FIXME: Centralize debug output control 165 // vertexShader->printInstruction(i, "debug.txt"); 166 // #endif 167 168 if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB) 169 { 170 continue; 171 } 172 173 Dst dest = instruction->getDestinationParameter(); 174 Src src0 = instruction->getSourceParameter(0); 175 Src src1 = instruction->getSourceParameter(1); 176 Src src2 = instruction->getSourceParameter(2); 177 Src src3 = instruction->getSourceParameter(3); 178 179 bool predicate = instruction->isPredicate(); 180 int size = vertexShader->size(opcode); 181 Usage usage = instruction->getUsage(); 182 unsigned char usageIndex = instruction->getUsageIndex(); 183 Control control = instruction->getControl(); 184 bool integer = dest.type == Dst::PARAMETER_ADDR; 185 bool pp = dest.partialPrecision; 186 187 Color4f d; 188 Color4f s0; 189 Color4f s1; 190 Color4f s2; 191 Color4f s3; 192 193 if(src0.type != Src::PARAMETER_VOID) s0 = reg(r, src0); 194 if(src1.type != Src::PARAMETER_VOID) s1 = reg(r, src1); 195 if(src2.type != Src::PARAMETER_VOID) s2 = reg(r, src2); 196 if(src3.type != Src::PARAMETER_VOID) s3 = reg(r, src3); 197 198 switch(opcode) 199 { 200 case Op::OPCODE_VS_1_0: break; 201 case Op::OPCODE_VS_1_1: break; 202 case Op::OPCODE_VS_2_0: break; 203 case Op::OPCODE_VS_2_x: break; 204 case Op::OPCODE_VS_2_sw: break; 205 case Op::OPCODE_VS_3_0: break; 206 case Op::OPCODE_VS_3_sw: break; 207 case Op::OPCODE_DCL: break; 208 case Op::OPCODE_DEF: break; 209 case Op::OPCODE_DEFI: break; 210 case Op::OPCODE_DEFB: break; 211 case Op::OPCODE_NOP: break; 212 case Op::OPCODE_ABS: abs(d, s0); break; 213 case Op::OPCODE_ADD: add(d, s0, s1); break; 214 case Op::OPCODE_CRS: crs(d, s0, s1); break; 215 case Op::OPCODE_DP3: dp3(d, s0, s1); break; 216 case Op::OPCODE_DP4: dp4(d, s0, s1); break; 217 case Op::OPCODE_DST: dst(d, s0, s1); break; 218 case Op::OPCODE_EXP: exp(d, s0, pp); break; 219 case Op::OPCODE_EXPP: expp(d, s0, version); break; 220 case Op::OPCODE_FRC: frc(d, s0); break; 221 case Op::OPCODE_LIT: lit(d, s0); break; 222 case Op::OPCODE_LOG: log(d, s0, pp); break; 223 case Op::OPCODE_LOGP: logp(d, s0, version); break; 224 case Op::OPCODE_LRP: lrp(d, s0, s1, s2); break; 225 case Op::OPCODE_M3X2: M3X2(r, d, s0, src1); break; 226 case Op::OPCODE_M3X3: M3X3(r, d, s0, src1); break; 227 case Op::OPCODE_M3X4: M3X4(r, d, s0, src1); break; 228 case Op::OPCODE_M4X3: M4X3(r, d, s0, src1); break; 229 case Op::OPCODE_M4X4: M4X4(r, d, s0, src1); break; 230 case Op::OPCODE_MAD: mad(d, s0, s1, s2); break; 231 case Op::OPCODE_MAX: max(d, s0, s1); break; 232 case Op::OPCODE_MIN: min(d, s0, s1); break; 233 case Op::OPCODE_MOV: mov(d, s0, integer); break; 234 case Op::OPCODE_MOVA: mov(d, s0); break; 235 case Op::OPCODE_MUL: mul(d, s0, s1); break; 236 case Op::OPCODE_NRM: nrm(d, s0, pp); break; 237 case Op::OPCODE_POW: pow(d, s0, s1, pp); break; 238 case Op::OPCODE_RCP: rcp(d, s0, pp); break; 239 case Op::OPCODE_RSQ: rsq(d, s0, pp); break; 240 case Op::OPCODE_SGE: sge(d, s0, s1); break; 241 case Op::OPCODE_SGN: sgn(d, s0); break; 242 case Op::OPCODE_SINCOS: sincos(d, s0, pp); break; 243 case Op::OPCODE_SLT: slt(d, s0, s1); break; 244 case Op::OPCODE_SUB: sub(d, s0, s1); break; 245 case Op::OPCODE_BREAK: BREAK(r); break; 246 case Op::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break; 247 case Op::OPCODE_BREAKP: BREAKP(r, src0); break; 248 case Op::OPCODE_CALL: CALL(r, dest.index); break; 249 case Op::OPCODE_CALLNZ: CALLNZ(r, dest.index, src0); break; 250 case Op::OPCODE_ELSE: ELSE(r); break; 251 case Op::OPCODE_ENDIF: ENDIF(r); break; 252 case Op::OPCODE_ENDLOOP: ENDLOOP(r); break; 253 case Op::OPCODE_ENDREP: ENDREP(r); break; 254 case Op::OPCODE_IF: IF(r, src0); break; 255 case Op::OPCODE_IFC: IFC(r, s0, s1, control); break; 256 case Op::OPCODE_LABEL: LABEL(dest.index); break; 257 case Op::OPCODE_LOOP: LOOP(r, src1); break; 258 case Op::OPCODE_REP: REP(r, src0); break; 259 case Op::OPCODE_RET: RET(r); break; 260 case Op::OPCODE_SETP: setp(d, s0, s1, control); break; 261 case Op::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1); break; 262 case Op::OPCODE_END: break; 263 default: 264 ASSERT(false); 265 } 266 267 if(dest.type != Dst::PARAMETER_VOID && dest.type != Dst::PARAMETER_LABEL) 268 { 269 if(dest.saturate) 270 { 271 if(dest.x) d.r = Max(d.r, Float4(0.0f, 0.0f, 0.0f, 0.0f)); 272 if(dest.y) d.g = Max(d.g, Float4(0.0f, 0.0f, 0.0f, 0.0f)); 273 if(dest.z) d.b = Max(d.b, Float4(0.0f, 0.0f, 0.0f, 0.0f)); 274 if(dest.w) d.a = Max(d.a, Float4(0.0f, 0.0f, 0.0f, 0.0f)); 275 276 if(dest.x) d.r = Min(d.r, Float4(1.0f, 1.0f, 1.0f, 1.0f)); 277 if(dest.y) d.g = Min(d.g, Float4(1.0f, 1.0f, 1.0f, 1.0f)); 278 if(dest.z) d.b = Min(d.b, Float4(1.0f, 1.0f, 1.0f, 1.0f)); 279 if(dest.w) d.a = Min(d.a, Float4(1.0f, 1.0f, 1.0f, 1.0f)); 280 } 281 282 if(vertexShader->containsDynamicBranching()) 283 { 284 Color4f pDst; // FIXME: Rename 285 286 switch(dest.type) 287 { 288 case Dst::PARAMETER_VOID: break; 289 case Dst::PARAMETER_TEMP: pDst = r.r[dest.index]; break; 290 case Dst::PARAMETER_ADDR: pDst = r.a0; break; 291 case Dst::PARAMETER_RASTOUT: 292 switch(dest.index) 293 { 294 case 0: 295 if(dest.x) pDst.x = r.ox[Pos]; 296 if(dest.y) pDst.y = r.oy[Pos]; 297 if(dest.z) pDst.z = r.oz[Pos]; 298 if(dest.w) pDst.w = r.ow[Pos]; 299 break; 300 case 1: 301 pDst.x = r.ox[Fog]; 302 break; 303 case 2: 304 pDst.x = r.oy[Pts]; 305 break; 306 default: 307 ASSERT(false); 308 } 309 break; 310 case Dst::PARAMETER_ATTROUT: 311 if(dest.x) pDst.x = r.ox[D0 + dest.index]; 312 if(dest.y) pDst.y = r.oy[D0 + dest.index]; 313 if(dest.z) pDst.z = r.oz[D0 + dest.index]; 314 if(dest.w) pDst.w = r.ow[D0 + dest.index]; 315 break; 316 case Dst::PARAMETER_TEXCRDOUT: 317 // case Dst::PARAMETER_OUTPUT: 318 if(version < 0x0300) 319 { 320 if(dest.x) pDst.x = r.ox[T0 + dest.index]; 321 if(dest.y) pDst.y = r.oy[T0 + dest.index]; 322 if(dest.z) pDst.z = r.oz[T0 + dest.index]; 323 if(dest.w) pDst.w = r.ow[T0 + dest.index]; 324 } 325 else 326 { 327 if(!dest.relative) 328 { 329 if(dest.x) pDst.x = r.ox[dest.index]; 330 if(dest.y) pDst.y = r.oy[dest.index]; 331 if(dest.z) pDst.z = r.oz[dest.index]; 332 if(dest.w) pDst.w = r.ow[dest.index]; 333 } 334 else 335 { 336 Int aL = r.aL[r.loopDepth]; 337 338 if(dest.x) pDst.x = r.ox[dest.index + aL]; 339 if(dest.y) pDst.y = r.oy[dest.index + aL]; 340 if(dest.z) pDst.z = r.oz[dest.index + aL]; 341 if(dest.w) pDst.w = r.ow[dest.index + aL]; 342 } 343 } 344 break; 345 case Dst::PARAMETER_LABEL: break; 346 case Dst::PARAMETER_PREDICATE: pDst = r.p0; break; 347 case Dst::PARAMETER_INPUT: break; 348 default: 349 ASSERT(false); 350 } 351 352 Int4 enable = r.enableStack[r.enableIndex] & r.enableBreak; 353 354 Int4 xEnable = enable; 355 Int4 yEnable = enable; 356 Int4 zEnable = enable; 357 Int4 wEnable = enable; 358 359 if(predicate) 360 { 361 unsigned char pSwizzle = instruction->getPredicateSwizzle(); 362 363 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03]; 364 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03]; 365 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03]; 366 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03]; 367 368 if(!instruction->isPredicateNot()) 369 { 370 if(dest.x) xEnable = xEnable & As<Int4>(xPredicate); 371 if(dest.y) yEnable = yEnable & As<Int4>(yPredicate); 372 if(dest.z) zEnable = zEnable & As<Int4>(zPredicate); 373 if(dest.w) wEnable = wEnable & As<Int4>(wPredicate); 374 } 375 else 376 { 377 if(dest.x) xEnable = xEnable & ~As<Int4>(xPredicate); 378 if(dest.y) yEnable = yEnable & ~As<Int4>(yPredicate); 379 if(dest.z) zEnable = zEnable & ~As<Int4>(zPredicate); 380 if(dest.w) wEnable = wEnable & ~As<Int4>(wPredicate); 381 } 382 } 383 384 if(dest.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); 385 if(dest.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); 386 if(dest.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); 387 if(dest.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); 388 389 if(dest.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); 390 if(dest.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); 391 if(dest.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); 392 if(dest.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); 393 } 394 395 switch(dest.type) 396 { 397 case Dst::PARAMETER_VOID: 398 break; 399 case Dst::PARAMETER_TEMP: 400 if(dest.x) r.r[dest.index].x = d.x; 401 if(dest.y) r.r[dest.index].y = d.y; 402 if(dest.z) r.r[dest.index].z = d.z; 403 if(dest.w) r.r[dest.index].w = d.w; 404 break; 405 case Dst::PARAMETER_ADDR: 406 if(dest.x) r.a0.x = d.x; 407 if(dest.y) r.a0.y = d.y; 408 if(dest.z) r.a0.z = d.z; 409 if(dest.w) r.a0.w = d.w; 410 break; 411 case Dst::PARAMETER_RASTOUT: 412 switch(dest.index) 413 { 414 case 0: 415 if(dest.x) r.ox[Pos] = d.x; 416 if(dest.y) r.oy[Pos] = d.y; 417 if(dest.z) r.oz[Pos] = d.z; 418 if(dest.w) r.ow[Pos] = d.w; 419 break; 420 case 1: 421 r.ox[Fog] = d.x; 422 break; 423 case 2: 424 r.oy[Pts] = d.x; 425 break; 426 default: ASSERT(false); 427 } 428 break; 429 case Dst::PARAMETER_ATTROUT: 430 if(dest.x) r.ox[D0 + dest.index] = d.x; 431 if(dest.y) r.oy[D0 + dest.index] = d.y; 432 if(dest.z) r.oz[D0 + dest.index] = d.z; 433 if(dest.w) r.ow[D0 + dest.index] = d.w; 434 break; 435 case Dst::PARAMETER_TEXCRDOUT: 436 // case Dst::PARAMETER_OUTPUT: 437 if(version < 0x0300) 438 { 439 if(dest.x) r.ox[T0 + dest.index] = d.x; 440 if(dest.y) r.oy[T0 + dest.index] = d.y; 441 if(dest.z) r.oz[T0 + dest.index] = d.z; 442 if(dest.w) r.ow[T0 + dest.index] = d.w; 443 } 444 else 445 { 446 if(!dest.relative) 447 { 448 if(dest.x) r.ox[dest.index] = d.x; 449 if(dest.y) r.oy[dest.index] = d.y; 450 if(dest.z) r.oz[dest.index] = d.z; 451 if(dest.w) r.ow[dest.index] = d.w; 452 } 453 else 454 { 455 Int aL = r.aL[r.loopDepth]; 456 457 if(dest.x) r.ox[dest.index + aL] = d.x; 458 if(dest.y) r.oy[dest.index + aL] = d.y; 459 if(dest.z) r.oz[dest.index + aL] = d.z; 460 if(dest.w) r.ow[dest.index + aL] = d.w; 461 } 462 } 463 break; 464 case Dst::PARAMETER_LABEL: break; 465 case Dst::PARAMETER_PREDICATE: r.p0 = d; break; 466 case Dst::PARAMETER_INPUT: break; 467 default: 468 ASSERT(false); 469 } 470 } 471 } 472 473 if(returns) 474 { 475 Nucleus::setInsertBlock(returnBlock); 476 } 477 } 478 479 void VertexProgram::passThrough(Registers &r) 480 { 481 if(vertexShader) 482 { 483 for(int i = 0; i < 12; i++) 484 { 485 unsigned char usage = vertexShader->output[i][0].usage; 486 unsigned char index = vertexShader->output[i][0].index; 487 488 switch(usage) 489 { 490 case 0xFF: 491 continue; 492 case ShaderOperation::USAGE_PSIZE: 493 r.oy[i] = r.v[i].x; 494 break; 495 case ShaderOperation::USAGE_TEXCOORD: 496 r.ox[i] = r.v[i].x; 497 r.oy[i] = r.v[i].y; 498 r.oz[i] = r.v[i].z; 499 r.ow[i] = r.v[i].w; 500 break; 501 case ShaderOperation::USAGE_POSITION: 502 r.ox[i] = r.v[i].x; 503 r.oy[i] = r.v[i].y; 504 r.oz[i] = r.v[i].z; 505 r.ow[i] = r.v[i].w; 506 break; 507 case ShaderOperation::USAGE_COLOR: 508 r.ox[i] = r.v[i].x; 509 r.oy[i] = r.v[i].y; 510 r.oz[i] = r.v[i].z; 511 r.ow[i] = r.v[i].w; 512 break; 513 case ShaderOperation::USAGE_FOG: 514 r.ox[i] = r.v[i].x; 515 break; 516 default: 517 ASSERT(false); 518 } 519 } 520 } 521 else 522 { 523 r.ox[Pos] = r.v[PositionT].x; 524 r.oy[Pos] = r.v[PositionT].y; 525 r.oz[Pos] = r.v[PositionT].z; 526 r.ow[Pos] = r.v[PositionT].w; 527 528 for(int i = 0; i < 2; i++) 529 { 530 r.ox[D0 + i] = r.v[Color0 + i].x; 531 r.oy[D0 + i] = r.v[Color0 + i].y; 532 r.oz[D0 + i] = r.v[Color0 + i].z; 533 r.ow[D0 + i] = r.v[Color0 + i].w; 534 } 535 536 for(int i = 0; i < 8; i++) 537 { 538 r.ox[T0 + i] = r.v[TexCoord0 + i].x; 539 r.oy[T0 + i] = r.v[TexCoord0 + i].y; 540 r.oz[T0 + i] = r.v[TexCoord0 + i].z; 541 r.ow[T0 + i] = r.v[TexCoord0 + i].w; 542 } 543 544 r.oy[Pts] = r.v[PSize].x; 545 } 546 } 547 548 Color4f VertexProgram::reg(Registers &r, const Src &src, int offset) 549 { 550 int i = src.index + offset; 551 552 Color4f reg; 553 554 if(src.type == Src::PARAMETER_CONST) 555 { 556 reg = readConstant(r, src, offset); 557 } 558 559 switch(src.type) 560 { 561 case Src::PARAMETER_TEMP: reg = r.r[i]; break; 562 case Src::PARAMETER_CONST: break; 563 case Src::PARAMETER_INPUT: reg = r.v[i]; break; 564 case Src::PARAMETER_VOID: return r.r[0]; // Dummy 565 case Src::PARAMETER_FLOATLITERAL: return r.r[0]; // Dummy 566 case Src::PARAMETER_ADDR: reg = r.a0; break; 567 case Src::PARAMETER_CONSTBOOL: return r.r[0]; // Dummy 568 case Src::PARAMETER_CONSTINT: return r.r[0]; // Dummy 569 case Src::PARAMETER_LOOP: return r.r[0]; // Dummy 570 case Src::PARAMETER_PREDICATE: return r.r[0]; // Dummy 571 case Src::PARAMETER_SAMPLER: return r.r[0]; // Dummy 572 default: 573 ASSERT(false); 574 } 575 576 Color4f mod; 577 578 mod.x = reg[(src.swizzle >> 0) & 0x03]; 579 mod.y = reg[(src.swizzle >> 2) & 0x03]; 580 mod.z = reg[(src.swizzle >> 4) & 0x03]; 581 mod.w = reg[(src.swizzle >> 6) & 0x03]; 582 583 switch(src.modifier) 584 { 585 case Src::MODIFIER_NONE: 586 break; 587 case Src::MODIFIER_NEGATE: 588 mod.x = -mod.x; 589 mod.y = -mod.y; 590 mod.z = -mod.z; 591 mod.w = -mod.w; 592 break; 593 case Src::MODIFIER_BIAS: 594 ASSERT(false); // NOTE: Unimplemented 595 break; 596 case Src::MODIFIER_BIAS_NEGATE: 597 ASSERT(false); // NOTE: Unimplemented 598 break; 599 case Src::MODIFIER_SIGN: 600 ASSERT(false); // NOTE: Unimplemented 601 break; 602 case Src::MODIFIER_SIGN_NEGATE: 603 ASSERT(false); // NOTE: Unimplemented 604 break; 605 case Src::MODIFIER_COMPLEMENT: 606 ASSERT(false); // NOTE: Unimplemented 607 break; 608 case Src::MODIFIER_X2: 609 ASSERT(false); // NOTE: Unimplemented 610 break; 611 case Src::MODIFIER_X2_NEGATE: 612 ASSERT(false); // NOTE: Unimplemented 613 break; 614 case Src::MODIFIER_DZ: 615 ASSERT(false); // NOTE: Unimplemented 616 break; 617 case Src::MODIFIER_DW: 618 ASSERT(false); // NOTE: Unimplemented 619 break; 620 case Src::MODIFIER_ABS: 621 mod.x = Abs(mod.x); 622 mod.y = Abs(mod.y); 623 mod.z = Abs(mod.z); 624 mod.w = Abs(mod.w); 625 break; 626 case Src::MODIFIER_ABS_NEGATE: 627 mod.x = -Abs(mod.x); 628 mod.y = -Abs(mod.y); 629 mod.z = -Abs(mod.z); 630 mod.w = -Abs(mod.w); 631 break; 632 case Src::MODIFIER_NOT: 633 UNIMPLEMENTED(); 634 break; 635 default: 636 ASSERT(false); 637 } 638 639 return mod; 640 } 641 642 void VertexProgram::M3X2(Registers &r, Color4f &dst, Color4f &src0, Src &src1) 643 { 644 Color4f row0 = reg(r, src1, 0); 645 Color4f row1 = reg(r, src1, 1); 646 647 dst.x = dot3(src0, row0); 648 dst.y = dot3(src0, row1); 649 } 650 651 void VertexProgram::M3X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1) 652 { 653 Color4f row0 = reg(r, src1, 0); 654 Color4f row1 = reg(r, src1, 1); 655 Color4f row2 = reg(r, src1, 2); 656 657 dst.x = dot3(src0, row0); 658 dst.y = dot3(src0, row1); 659 dst.z = dot3(src0, row2); 660 } 661 662 void VertexProgram::M3X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1) 663 { 664 Color4f row0 = reg(r, src1, 0); 665 Color4f row1 = reg(r, src1, 1); 666 Color4f row2 = reg(r, src1, 2); 667 Color4f row3 = reg(r, src1, 3); 668 669 dst.x = dot3(src0, row0); 670 dst.y = dot3(src0, row1); 671 dst.z = dot3(src0, row2); 672 dst.w = dot3(src0, row3); 673 } 674 675 void VertexProgram::M4X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1) 676 { 677 Color4f row0 = reg(r, src1, 0); 678 Color4f row1 = reg(r, src1, 1); 679 Color4f row2 = reg(r, src1, 2); 680 681 dst.x = dot4(src0, row0); 682 dst.y = dot4(src0, row1); 683 dst.z = dot4(src0, row2); 684 } 685 686 void VertexProgram::M4X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1) 687 { 688 Color4f row0 = reg(r, src1, 0); 689 Color4f row1 = reg(r, src1, 1); 690 Color4f row2 = reg(r, src1, 2); 691 Color4f row3 = reg(r, src1, 3); 692 693 dst.x = dot4(src0, row0); 694 dst.y = dot4(src0, row1); 695 dst.z = dot4(src0, row2); 696 dst.w = dot4(src0, row3); 697 } 698 699 void VertexProgram::BREAK(Registers &r) 700 { 701 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock(); 702 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 703 704 if(breakDepth == 0) 705 { 706 Nucleus::createBr(endBlock); 707 } 708 else 709 { 710 r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex]; 711 Bool allBreak = SignMask(r.enableBreak) == 0x0; 712 713 branch(allBreak, endBlock, deadBlock); 714 } 715 716 Nucleus::setInsertBlock(deadBlock); 717 } 718 719 void VertexProgram::BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control control) 720 { 721 Int4 condition; 722 723 switch(control) 724 { 725 case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 726 case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 727 case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 728 case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 729 case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 730 case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 731 default: 732 ASSERT(false); 733 } 734 735 condition &= r.enableStack[r.enableIndex]; 736 737 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); 738 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 739 740 r.enableBreak = r.enableBreak & ~condition; 741 Bool allBreak = SignMask(r.enableBreak) == 0x0; 742 743 branch(allBreak, endBlock, continueBlock); 744 Nucleus::setInsertBlock(continueBlock); 745 } 746 747 void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC 748 { 749 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); 750 751 if(predicateRegister.modifier == Src::MODIFIER_NOT) 752 { 753 condition = ~condition; 754 } 755 756 condition &= r.enableStack[r.enableIndex]; 757 758 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); 759 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 760 761 r.enableBreak = r.enableBreak & ~condition; 762 Bool allBreak = SignMask(r.enableBreak) == 0x0; 763 764 branch(allBreak, endBlock, continueBlock); 765 Nucleus::setInsertBlock(continueBlock); 766 } 767 768 void VertexProgram::CALL(Registers &r, int labelIndex) 769 { 770 if(!labelBlock[labelIndex]) 771 { 772 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 773 } 774 775 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); 776 callRetBlock.push_back(retBlock); 777 778 r.callStack[r.stackIndex++] = UInt((unsigned int)callRetBlock.size() - 1); // FIXME 779 780 Nucleus::createBr(labelBlock[labelIndex]); 781 Nucleus::setInsertBlock(retBlock); 782 } 783 784 void VertexProgram::CALLNZ(Registers &r, int labelIndex, const Src &src) 785 { 786 if(src.type == Src::PARAMETER_CONSTBOOL) 787 { 788 CALLNZb(r, labelIndex, src); 789 } 790 else if(src.type == Src::PARAMETER_PREDICATE) 791 { 792 CALLNZp(r, labelIndex, src); 793 } 794 else ASSERT(false); 795 } 796 797 void VertexProgram::CALLNZb(Registers &r, int labelIndex, const Src &boolRegister) 798 { 799 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME 800 801 if(boolRegister.modifier == Src::MODIFIER_NOT) 802 { 803 condition = !condition; 804 } 805 806 if(!labelBlock[labelIndex]) 807 { 808 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 809 } 810 811 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); 812 callRetBlock.push_back(retBlock); 813 814 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME 815 816 branch(condition, labelBlock[labelIndex], retBlock); 817 Nucleus::setInsertBlock(retBlock); 818 } 819 820 void VertexProgram::CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister) 821 { 822 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); 823 824 if(predicateRegister.modifier == Src::MODIFIER_NOT) 825 { 826 condition = ~condition; 827 } 828 829 condition &= r.enableStack[r.enableIndex]; 830 831 if(!labelBlock[labelIndex]) 832 { 833 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 834 } 835 836 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock(); 837 callRetBlock.push_back(retBlock); 838 839 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME 840 841 r.enableIndex++; 842 r.enableStack[r.enableIndex] = condition; 843 844 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; 845 846 branch(notAllFalse, labelBlock[labelIndex], retBlock); 847 Nucleus::setInsertBlock(retBlock); 848 849 r.enableIndex--; 850 } 851 852 void VertexProgram::ELSE(Registers &r) 853 { 854 ifDepth--; 855 856 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth]; 857 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 858 859 if(isConditionalIf[ifDepth]) 860 { 861 Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1]; 862 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; 863 864 branch(notAllFalse, falseBlock, endBlock); 865 866 r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1]; 867 } 868 else 869 { 870 Nucleus::createBr(endBlock); 871 Nucleus::setInsertBlock(falseBlock); 872 } 873 874 ifFalseBlock[ifDepth] = endBlock; 875 876 ifDepth++; 877 } 878 879 void VertexProgram::ENDIF(Registers &r) 880 { 881 ifDepth--; 882 883 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth]; 884 885 Nucleus::createBr(endBlock); 886 Nucleus::setInsertBlock(endBlock); 887 888 if(isConditionalIf[ifDepth]) 889 { 890 breakDepth--; 891 r.enableIndex--; 892 } 893 } 894 895 void VertexProgram::ENDREP(Registers &r) 896 { 897 loopRepDepth--; 898 899 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 900 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 901 902 Nucleus::createBr(testBlock); 903 Nucleus::setInsertBlock(endBlock); 904 905 r.loopDepth--; 906 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 907 } 908 909 void VertexProgram::ENDLOOP(Registers &r) 910 { 911 loopRepDepth--; 912 913 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth]; // FIXME: += 914 915 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 916 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 917 918 Nucleus::createBr(testBlock); 919 Nucleus::setInsertBlock(endBlock); 920 921 r.loopDepth--; 922 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 923 } 924 925 void VertexProgram::IF(Registers &r, const Src &src) 926 { 927 if(src.type == Src::PARAMETER_CONSTBOOL) 928 { 929 IFb(r, src); 930 } 931 else if(src.type == Src::PARAMETER_PREDICATE) 932 { 933 IFp(r, src); 934 } 935 else ASSERT(false); 936 } 937 938 void VertexProgram::IFb(Registers &r, const Src &boolRegister) 939 { 940 ASSERT(ifDepth < 24 + 4); 941 942 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME 943 944 if(boolRegister.modifier == Src::MODIFIER_NOT) 945 { 946 condition = !condition; 947 } 948 949 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); 950 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); 951 952 branch(condition, trueBlock, falseBlock); 953 954 isConditionalIf[ifDepth] = false; 955 ifFalseBlock[ifDepth] = falseBlock; 956 957 ifDepth++; 958 } 959 960 void VertexProgram::IFp(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with IFC 961 { 962 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]); 963 964 if(predicateRegister.modifier == Src::MODIFIER_NOT) 965 { 966 condition = ~condition; 967 } 968 969 condition &= r.enableStack[r.enableIndex]; 970 971 r.enableIndex++; 972 r.enableStack[r.enableIndex] = condition; 973 974 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); 975 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); 976 977 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; 978 979 branch(notAllFalse, trueBlock, falseBlock); 980 981 isConditionalIf[ifDepth] = true; 982 ifFalseBlock[ifDepth] = falseBlock; 983 984 ifDepth++; 985 breakDepth++; 986 } 987 988 void VertexProgram::IFC(Registers &r, Color4f &src0, Color4f &src1, Control control) 989 { 990 Int4 condition; 991 992 switch(control) 993 { 994 case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 995 case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 996 case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 997 case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 998 case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 999 case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1000 default: 1001 ASSERT(false); 1002 } 1003 1004 condition &= r.enableStack[r.enableIndex]; 1005 1006 r.enableIndex++; 1007 r.enableStack[r.enableIndex] = condition; 1008 1009 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1010 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1011 1012 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0; 1013 1014 branch(notAllFalse, trueBlock, falseBlock); 1015 1016 isConditionalIf[ifDepth] = true; 1017 ifFalseBlock[ifDepth] = falseBlock; 1018 1019 ifDepth++; 1020 breakDepth++; 1021 } 1022 1023 void VertexProgram::LABEL(int labelIndex) 1024 { 1025 Nucleus::setInsertBlock(labelBlock[labelIndex]); 1026 } 1027 1028 void VertexProgram::LOOP(Registers &r, const Src &integerRegister) 1029 { 1030 r.loopDepth++; 1031 1032 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0])); 1033 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1])); 1034 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2])); 1035 1036 // FIXME: Compiles to two instructions? 1037 If(r.increment[r.loopDepth] == 0) 1038 { 1039 r.increment[r.loopDepth] = 1; 1040 } 1041 1042 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1043 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); 1044 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1045 1046 loopRepTestBlock[loopRepDepth] = testBlock; 1047 loopRepEndBlock[loopRepDepth] = endBlock; 1048 1049 // FIXME: jump(testBlock) 1050 Nucleus::createBr(testBlock); 1051 Nucleus::setInsertBlock(testBlock); 1052 1053 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock); 1054 Nucleus::setInsertBlock(loopBlock); 1055 1056 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: -- 1057 1058 loopRepDepth++; 1059 breakDepth = 0; 1060 } 1061 1062 void VertexProgram::REP(Registers &r, const Src &integerRegister) 1063 { 1064 r.loopDepth++; 1065 1066 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0])); 1067 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1]; 1068 1069 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1070 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); 1071 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock(); 1072 1073 loopRepTestBlock[loopRepDepth] = testBlock; 1074 loopRepEndBlock[loopRepDepth] = endBlock; 1075 1076 // FIXME: jump(testBlock) 1077 Nucleus::createBr(testBlock); 1078 Nucleus::setInsertBlock(testBlock); 1079 1080 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock); 1081 Nucleus::setInsertBlock(loopBlock); 1082 1083 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: -- 1084 1085 loopRepDepth++; 1086 breakDepth = 0; 1087 } 1088 1089 void VertexProgram::RET(Registers &r) 1090 { 1091 if(!returns) 1092 { 1093 returnBlock = Nucleus::createBasicBlock(); 1094 Nucleus::createBr(returnBlock); 1095 1096 returns = true; 1097 } 1098 else 1099 { 1100 // FIXME: Encapsulate 1101 UInt index = r.callStack[--r.stackIndex]; 1102 1103 llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); 1104 llvm::Value *value = Nucleus::createLoad(index.address); 1105 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock.size()); 1106 1107 for(unsigned int i = 0; i < callRetBlock.size(); i++) 1108 { 1109 Nucleus::addSwitchCase(switchInst, i, callRetBlock[i]); 1110 } 1111 1112 Nucleus::setInsertBlock(unreachableBlock); 1113 Nucleus::createUnreachable(); 1114 } 1115 } 1116 1117 void VertexProgram::TEXLDL(Registers &r, Color4f &dst, Color4f &src0, const Src &src1) 1118 { 1119 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + src1.index * sizeof(Texture); 1120 1121 Color4f tmp; 1122 1123 sampler[src1.index]->sampleTexture(texture, tmp, src0.x, src0.y, src0.z, src0.w, src0, src0, false, false, true); 1124 1125 dst.x = tmp[(src1.swizzle >> 0) & 0x3]; 1126 dst.y = tmp[(src1.swizzle >> 2) & 0x3]; 1127 dst.z = tmp[(src1.swizzle >> 4) & 0x3]; 1128 dst.w = tmp[(src1.swizzle >> 6) & 0x3]; 1129 } 1130} 1131