LLVMReactor.cpp revision b98fe5cd1eaa821083d816cf86a20eefe22f57c7
1// Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "Nucleus.hpp" 16 17#include "llvm/Support/IRBuilder.h" 18#include "llvm/Function.h" 19#include "llvm/GlobalVariable.h" 20#include "llvm/Module.h" 21#include "llvm/LLVMContext.h" 22#include "llvm/Constants.h" 23#include "llvm/Intrinsics.h" 24#include "llvm/PassManager.h" 25#include "llvm/Analysis/LoopPass.h" 26#include "llvm/Transforms/Scalar.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Target/TargetOptions.h" 29#include "llvm/Support/TargetSelect.h" 30#include "../lib/ExecutionEngine/JIT/JIT.h" 31 32#include "LLVMRoutine.hpp" 33#include "LLVMRoutineManager.hpp" 34#include "x86.hpp" 35#include "CPUID.hpp" 36#include "Thread.hpp" 37#include "Memory.hpp" 38#include "MutexLock.hpp" 39 40#include <xmmintrin.h> 41#include <fstream> 42 43#if defined(__x86_64__) && defined(_WIN32) 44extern "C" void X86CompilationCallback() 45{ 46 assert(false); // UNIMPLEMENTED 47} 48#endif 49 50extern "C" 51{ 52 bool (*CodeAnalystInitialize)() = 0; 53 void (*CodeAnalystCompleteJITLog)() = 0; 54 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0; 55} 56 57namespace llvm 58{ 59 extern bool JITEmitDebugInfo; 60} 61 62namespace 63{ 64 sw::LLVMRoutineManager *routineManager = nullptr; 65 llvm::ExecutionEngine *executionEngine = nullptr; 66 llvm::IRBuilder<> *builder = nullptr; 67 llvm::LLVMContext *context = nullptr; 68 llvm::Module *module = nullptr; 69 llvm::Function *function = nullptr; 70 71 sw::BackoffLock codegenMutex; 72 73 sw::BasicBlock *falseBB = nullptr; 74} 75 76namespace sw 77{ 78 using namespace llvm; 79 80 Optimization optimization[10] = {InstructionCombining, Disabled}; 81 82 class Type : public llvm::Type {}; 83 class Value : public llvm::Value {}; 84 class SwitchCases : public llvm::SwitchInst {}; 85 class BasicBlock : public llvm::BasicBlock {}; 86 87 inline Type *T(llvm::Type *t) 88 { 89 return reinterpret_cast<Type*>(t); 90 } 91 92 inline Value *V(llvm::Value *t) 93 { 94 return reinterpret_cast<Value*>(t); 95 } 96 97 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t) 98 { 99 return reinterpret_cast<std::vector<llvm::Type*>&>(t); 100 } 101 102 inline BasicBlock *B(llvm::BasicBlock *t) 103 { 104 return reinterpret_cast<BasicBlock*>(t); 105 } 106 107 Nucleus::Nucleus() 108 { 109 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe 110 111 InitializeNativeTarget(); 112 JITEmitDebugInfo = false; 113 114 if(!::context) 115 { 116 ::context = new LLVMContext(); 117 } 118 119 ::module = new Module("", *::context); 120 ::routineManager = new LLVMRoutineManager(); 121 122 #if defined(__x86_64__) 123 const char *architecture = "x86-64"; 124 #else 125 const char *architecture = "x86"; 126 #endif 127 128 SmallVector<std::string, 1> MAttrs; 129 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx"); 130 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov"); 131 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse"); 132 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2"); 133 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3"); 134 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3"); 135 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41"); 136 137 std::string error; 138 TargetMachine *targetMachine = EngineBuilder::selectTarget(::module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error); 139 ::executionEngine = JIT::createJIT(::module, 0, ::routineManager, CodeGenOpt::Aggressive, true, targetMachine); 140 141 if(!::builder) 142 { 143 ::builder = new IRBuilder<>(*::context); 144 145 #if defined(_WIN32) 146 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll"); 147 if(CodeAnalyst) 148 { 149 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize"); 150 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog"); 151 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode"); 152 153 CodeAnalystInitialize(); 154 } 155 #endif 156 } 157 } 158 159 Nucleus::~Nucleus() 160 { 161 delete ::executionEngine; 162 ::executionEngine = nullptr; 163 164 ::routineManager = nullptr; 165 ::function = nullptr; 166 ::module = nullptr; 167 168 ::codegenMutex.unlock(); 169 } 170 171 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations) 172 { 173 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator()) 174 { 175 llvm::Type *type = ::function->getReturnType(); 176 177 if(type->isVoidTy()) 178 { 179 createRetVoid(); 180 } 181 else 182 { 183 createRet(V(UndefValue::get(type))); 184 } 185 } 186 187 if(false) 188 { 189 std::string error; 190 raw_fd_ostream file("llvm-dump-unopt.txt", error); 191 ::module->print(file, 0); 192 } 193 194 if(runOptimizations) 195 { 196 optimize(); 197 } 198 199 if(false) 200 { 201 std::string error; 202 raw_fd_ostream file("llvm-dump-opt.txt", error); 203 ::module->print(file, 0); 204 } 205 206 void *entry = ::executionEngine->getPointerToFunction(::function); 207 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry); 208 209 if(CodeAnalystLogJITCode) 210 { 211 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name); 212 } 213 214 return routine; 215 } 216 217 void Nucleus::optimize() 218 { 219 static PassManager *passManager = nullptr; 220 221 if(!passManager) 222 { 223 passManager = new PassManager(); 224 225 UnsafeFPMath = true; 226 // NoInfsFPMath = true; 227 // NoNaNsFPMath = true; 228 229 passManager->add(new TargetData(*::executionEngine->getTargetData())); 230 passManager->add(createScalarReplAggregatesPass()); 231 232 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++) 233 { 234 switch(optimization[pass]) 235 { 236 case Disabled: break; 237 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break; 238 case LICM: passManager->add(createLICMPass()); break; 239 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break; 240 case GVN: passManager->add(createGVNPass()); break; 241 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break; 242 case Reassociate: passManager->add(createReassociatePass()); break; 243 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break; 244 case SCCP: passManager->add(createSCCPPass()); break; 245 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break; 246 default: 247 assert(false); 248 } 249 } 250 } 251 252 passManager->run(*::module); 253 } 254 255 Value *Nucleus::allocateStackVariable(Type *type, int arraySize) 256 { 257 // Need to allocate it in the entry block for mem2reg to work 258 llvm::BasicBlock &entryBlock = ::function->getEntryBlock(); 259 260 Instruction *declaration; 261 262 if(arraySize) 263 { 264 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize)); 265 } 266 else 267 { 268 declaration = new AllocaInst(type, (Value*)0); 269 } 270 271 entryBlock.getInstList().push_front(declaration); 272 273 return V(declaration); 274 } 275 276 BasicBlock *Nucleus::createBasicBlock() 277 { 278 return B(BasicBlock::Create(*::context, "", ::function)); 279 } 280 281 BasicBlock *Nucleus::getInsertBlock() 282 { 283 return B(::builder->GetInsertBlock()); 284 } 285 286 void Nucleus::setInsertBlock(BasicBlock *basicBlock) 287 { 288 // assert(::builder->GetInsertBlock()->back().isTerminator()); 289 return ::builder->SetInsertPoint(basicBlock); 290 } 291 292 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params) 293 { 294 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, T(Params), false); 295 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module); 296 ::function->setCallingConv(llvm::CallingConv::C); 297 298 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function)); 299 } 300 301 Value *Nucleus::getArgument(unsigned int index) 302 { 303 llvm::Function::arg_iterator args = ::function->arg_begin(); 304 305 while(index) 306 { 307 args++; 308 index--; 309 } 310 311 return V(&*args); 312 } 313 314 void Nucleus::createRetVoid() 315 { 316 x86::emms(); 317 318 ::builder->CreateRetVoid(); 319 } 320 321 void Nucleus::createRet(Value *v) 322 { 323 x86::emms(); 324 325 ::builder->CreateRet(v); 326 } 327 328 void Nucleus::createBr(BasicBlock *dest) 329 { 330 ::builder->CreateBr(dest); 331 } 332 333 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse) 334 { 335 ::builder->CreateCondBr(cond, ifTrue, ifFalse); 336 } 337 338 Value *Nucleus::createAdd(Value *lhs, Value *rhs) 339 { 340 return V(::builder->CreateAdd(lhs, rhs)); 341 } 342 343 Value *Nucleus::createSub(Value *lhs, Value *rhs) 344 { 345 return V(::builder->CreateSub(lhs, rhs)); 346 } 347 348 Value *Nucleus::createMul(Value *lhs, Value *rhs) 349 { 350 return V(::builder->CreateMul(lhs, rhs)); 351 } 352 353 Value *Nucleus::createUDiv(Value *lhs, Value *rhs) 354 { 355 return V(::builder->CreateUDiv(lhs, rhs)); 356 } 357 358 Value *Nucleus::createSDiv(Value *lhs, Value *rhs) 359 { 360 return V(::builder->CreateSDiv(lhs, rhs)); 361 } 362 363 Value *Nucleus::createFAdd(Value *lhs, Value *rhs) 364 { 365 return V(::builder->CreateFAdd(lhs, rhs)); 366 } 367 368 Value *Nucleus::createFSub(Value *lhs, Value *rhs) 369 { 370 return V(::builder->CreateFSub(lhs, rhs)); 371 } 372 373 Value *Nucleus::createFMul(Value *lhs, Value *rhs) 374 { 375 return V(::builder->CreateFMul(lhs, rhs)); 376 } 377 378 Value *Nucleus::createFDiv(Value *lhs, Value *rhs) 379 { 380 return V(::builder->CreateFDiv(lhs, rhs)); 381 } 382 383 Value *Nucleus::createURem(Value *lhs, Value *rhs) 384 { 385 return V(::builder->CreateURem(lhs, rhs)); 386 } 387 388 Value *Nucleus::createSRem(Value *lhs, Value *rhs) 389 { 390 return V(::builder->CreateSRem(lhs, rhs)); 391 } 392 393 Value *Nucleus::createFRem(Value *lhs, Value *rhs) 394 { 395 return V(::builder->CreateFRem(lhs, rhs)); 396 } 397 398 Value *Nucleus::createShl(Value *lhs, Value *rhs) 399 { 400 return V(::builder->CreateShl(lhs, rhs)); 401 } 402 403 Value *Nucleus::createLShr(Value *lhs, Value *rhs) 404 { 405 return V(::builder->CreateLShr(lhs, rhs)); 406 } 407 408 Value *Nucleus::createAShr(Value *lhs, Value *rhs) 409 { 410 return V(::builder->CreateAShr(lhs, rhs)); 411 } 412 413 Value *Nucleus::createAnd(Value *lhs, Value *rhs) 414 { 415 return V(::builder->CreateAnd(lhs, rhs)); 416 } 417 418 Value *Nucleus::createOr(Value *lhs, Value *rhs) 419 { 420 return V(::builder->CreateOr(lhs, rhs)); 421 } 422 423 Value *Nucleus::createXor(Value *lhs, Value *rhs) 424 { 425 return V(::builder->CreateXor(lhs, rhs)); 426 } 427 428 Value *Nucleus::createNeg(Value *v) 429 { 430 return V(::builder->CreateNeg(v)); 431 } 432 433 Value *Nucleus::createFNeg(Value *v) 434 { 435 return V(::builder->CreateFNeg(v)); 436 } 437 438 Value *Nucleus::createNot(Value *v) 439 { 440 return V(::builder->CreateNot(v)); 441 } 442 443 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align) 444 { 445 assert(ptr->getType()->getContainedType(0) == type); 446 return V(::builder->Insert(new LoadInst(ptr, "", isVolatile, align))); 447 } 448 449 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align) 450 { 451 assert(ptr->getType()->getContainedType(0) == type); 452 ::builder->Insert(new StoreInst(value, ptr, isVolatile, align)); 453 return value; 454 } 455 456 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index) 457 { 458 assert(ptr->getType()->getContainedType(0) == type); 459 return V(::builder->CreateGEP(ptr, index)); 460 } 461 462 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value) 463 { 464 return V(::builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent)); 465 } 466 467 Value *Nucleus::createTrunc(Value *v, Type *destType) 468 { 469 return V(::builder->CreateTrunc(v, destType)); 470 } 471 472 Value *Nucleus::createZExt(Value *v, Type *destType) 473 { 474 return V(::builder->CreateZExt(v, destType)); 475 } 476 477 Value *Nucleus::createSExt(Value *v, Type *destType) 478 { 479 return V(::builder->CreateSExt(v, destType)); 480 } 481 482 Value *Nucleus::createFPToSI(Value *v, Type *destType) 483 { 484 return V(::builder->CreateFPToSI(v, destType)); 485 } 486 487 Value *Nucleus::createUIToFP(Value *v, Type *destType) 488 { 489 return V(::builder->CreateUIToFP(v, destType)); 490 } 491 492 Value *Nucleus::createSIToFP(Value *v, Type *destType) 493 { 494 return V(::builder->CreateSIToFP(v, destType)); 495 } 496 497 Value *Nucleus::createFPTrunc(Value *v, Type *destType) 498 { 499 return V(::builder->CreateFPTrunc(v, destType)); 500 } 501 502 Value *Nucleus::createFPExt(Value *v, Type *destType) 503 { 504 return V(::builder->CreateFPExt(v, destType)); 505 } 506 507 Value *Nucleus::createBitCast(Value *v, Type *destType) 508 { 509 return V(::builder->CreateBitCast(v, destType)); 510 } 511 512 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs) 513 { 514 return V(::builder->CreateICmpEQ(lhs, rhs)); 515 } 516 517 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs) 518 { 519 return V(::builder->CreateICmpNE(lhs, rhs)); 520 } 521 522 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs) 523 { 524 return V(::builder->CreateICmpUGT(lhs, rhs)); 525 } 526 527 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs) 528 { 529 return V(::builder->CreateICmpUGE(lhs, rhs)); 530 } 531 532 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs) 533 { 534 return V(::builder->CreateICmpULT(lhs, rhs)); 535 } 536 537 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs) 538 { 539 return V(::builder->CreateICmpULE(lhs, rhs)); 540 } 541 542 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs) 543 { 544 return V(::builder->CreateICmpSGT(lhs, rhs)); 545 } 546 547 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs) 548 { 549 return V(::builder->CreateICmpSGE(lhs, rhs)); 550 } 551 552 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs) 553 { 554 return V(::builder->CreateICmpSLT(lhs, rhs)); 555 } 556 557 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs) 558 { 559 return V(::builder->CreateICmpSLE(lhs, rhs)); 560 } 561 562 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs) 563 { 564 return V(::builder->CreateFCmpOEQ(lhs, rhs)); 565 } 566 567 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs) 568 { 569 return V(::builder->CreateFCmpOGT(lhs, rhs)); 570 } 571 572 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs) 573 { 574 return V(::builder->CreateFCmpOGE(lhs, rhs)); 575 } 576 577 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs) 578 { 579 return V(::builder->CreateFCmpOLT(lhs, rhs)); 580 } 581 582 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs) 583 { 584 return V(::builder->CreateFCmpOLE(lhs, rhs)); 585 } 586 587 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs) 588 { 589 return V(::builder->CreateFCmpONE(lhs, rhs)); 590 } 591 592 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs) 593 { 594 return V(::builder->CreateFCmpORD(lhs, rhs)); 595 } 596 597 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs) 598 { 599 return V(::builder->CreateFCmpUNO(lhs, rhs)); 600 } 601 602 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs) 603 { 604 return V(::builder->CreateFCmpUEQ(lhs, rhs)); 605 } 606 607 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs) 608 { 609 return V(::builder->CreateFCmpUGT(lhs, rhs)); 610 } 611 612 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs) 613 { 614 return V(::builder->CreateFCmpUGE(lhs, rhs)); 615 } 616 617 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs) 618 { 619 return V(::builder->CreateFCmpULT(lhs, rhs)); 620 } 621 622 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs) 623 { 624 return V(::builder->CreateFCmpULE(lhs, rhs)); 625 } 626 627 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs) 628 { 629 return V(::builder->CreateFCmpULE(lhs, rhs)); 630 } 631 632 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index) 633 { 634 assert(vector->getType()->getContainedType(0) == type); 635 return V(::builder->CreateExtractElement(vector, createConstantInt(index))); 636 } 637 638 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index) 639 { 640 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index))); 641 } 642 643 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select) 644 { 645 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements(); 646 const int maxSize = 16; 647 llvm::Constant *swizzle[maxSize]; 648 assert(size <= maxSize); 649 650 for(int i = 0; i < size; i++) 651 { 652 swizzle[i] = llvm::ConstantInt::get(Type::getInt32Ty(*::context), select[i]); 653 } 654 655 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size)); 656 657 return V(::builder->CreateShuffleVector(V1, V2, shuffle)); 658 } 659 660 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse) 661 { 662 return V(::builder->CreateSelect(C, ifTrue, ifFalse)); 663 } 664 665 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases) 666 { 667 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases)); 668 } 669 670 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch) 671 { 672 switchCases->addCase(llvm::ConstantInt::get(Type::getInt32Ty(*::context), label, true), branch); 673 } 674 675 void Nucleus::createUnreachable() 676 { 677 ::builder->CreateUnreachable(); 678 } 679 680 static Value *createSwizzle4(Value *val, unsigned char select) 681 { 682 int swizzle[4] = 683 { 684 (select >> 0) & 0x03, 685 (select >> 2) & 0x03, 686 (select >> 4) & 0x03, 687 (select >> 6) & 0x03, 688 }; 689 690 return Nucleus::createShuffleVector(val, val, swizzle); 691 } 692 693 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select) 694 { 695 bool mask[4] = {false, false, false, false}; 696 697 mask[(select >> 0) & 0x03] = true; 698 mask[(select >> 2) & 0x03] = true; 699 mask[(select >> 4) & 0x03] = true; 700 mask[(select >> 6) & 0x03] = true; 701 702 int swizzle[4] = 703 { 704 mask[0] ? 4 : 0, 705 mask[1] ? 5 : 1, 706 mask[2] ? 6 : 2, 707 mask[3] ? 7 : 3, 708 }; 709 710 Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, swizzle); 711 712 return shuffle; 713 } 714 715 Value *Nucleus::createConstantPointer(const void *address, Type *Ty, unsigned int align) 716 { 717 const GlobalValue *existingGlobal = ::executionEngine->getGlobalValueAtAddress(const_cast<void*>(address)); // FIXME: Const 718 719 if(existingGlobal) 720 { 721 return (Value*)existingGlobal; 722 } 723 724 llvm::GlobalValue *global = new llvm::GlobalVariable(*::module, Ty, true, llvm::GlobalValue::ExternalLinkage, 0, ""); 725 global->setAlignment(align); 726 727 ::executionEngine->addGlobalMapping(global, const_cast<void*>(address)); 728 729 return V(global); 730 } 731 732 Type *Nucleus::getPointerType(Type *ElementType) 733 { 734 return T(llvm::PointerType::get(ElementType, 0)); 735 } 736 737 Value *Nucleus::createNullValue(Type *Ty) 738 { 739 return V(llvm::Constant::getNullValue(Ty)); 740 } 741 742 Value *Nucleus::createConstantLong(int64_t i) 743 { 744 return V(llvm::ConstantInt::get(Type::getInt64Ty(*::context), i, true)); 745 } 746 747 Value *Nucleus::createConstantInt(int i) 748 { 749 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, true)); 750 } 751 752 Value *Nucleus::createConstantInt(unsigned int i) 753 { 754 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, false)); 755 } 756 757 Value *Nucleus::createConstantBool(bool b) 758 { 759 return V(llvm::ConstantInt::get(Type::getInt1Ty(*::context), b)); 760 } 761 762 Value *Nucleus::createConstantByte(signed char i) 763 { 764 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, true)); 765 } 766 767 Value *Nucleus::createConstantByte(unsigned char i) 768 { 769 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, false)); 770 } 771 772 Value *Nucleus::createConstantShort(short i) 773 { 774 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, true)); 775 } 776 777 Value *Nucleus::createConstantShort(unsigned short i) 778 { 779 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, false)); 780 } 781 782 Value *Nucleus::createConstantFloat(float x) 783 { 784 return V(llvm::ConstantFP::get(Float::getType(), x)); 785 } 786 787 Value *Nucleus::createNullPointer(Type *Ty) 788 { 789 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0))); 790 } 791 792 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type) 793 { 794 assert(llvm::isa<VectorType>(type)); 795 const int numConstants = llvm::cast<VectorType>(type)->getNumElements(); 796 assert(numConstants <= 16); 797 llvm::Constant *constantVector[16]; 798 799 for(int i = 0; i < numConstants; i++) 800 { 801 constantVector[i] = llvm::ConstantInt::get(type->getContainedType(0), constants[i]); 802 } 803 804 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants))); 805 } 806 807 Value *Nucleus::createConstantVector(const double *constants, Type *type) 808 { 809 assert(llvm::isa<VectorType>(type)); 810 const int numConstants = llvm::cast<VectorType>(type)->getNumElements(); 811 assert(numConstants <= 8); 812 llvm::Constant *constantVector[8]; 813 814 for(int i = 0; i < numConstants; i++) 815 { 816 constantVector[i] = llvm::ConstantFP::get(type->getContainedType(0), constants[i]); 817 } 818 819 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants))); 820 } 821 822 Type *Void::getType() 823 { 824 return T(llvm::Type::getVoidTy(*::context)); 825 } 826 827 class MMX : public Variable<MMX> 828 { 829 public: 830 static Type *getType(); 831 }; 832 833 Type *MMX::getType() 834 { 835 return T(llvm::Type::getX86_MMXTy(*::context)); 836 } 837 838 Bool::Bool(Argument<Bool> argument) 839 { 840 storeValue(argument.value); 841 } 842 843 Bool::Bool() 844 { 845 } 846 847 Bool::Bool(bool x) 848 { 849 storeValue(Nucleus::createConstantBool(x)); 850 } 851 852 Bool::Bool(RValue<Bool> rhs) 853 { 854 storeValue(rhs.value); 855 } 856 857 Bool::Bool(const Bool &rhs) 858 { 859 Value *value = rhs.loadValue(); 860 storeValue(value); 861 } 862 863 Bool::Bool(const Reference<Bool> &rhs) 864 { 865 Value *value = rhs.loadValue(); 866 storeValue(value); 867 } 868 869 RValue<Bool> Bool::operator=(RValue<Bool> rhs) const 870 { 871 storeValue(rhs.value); 872 873 return rhs; 874 } 875 876 RValue<Bool> Bool::operator=(const Bool &rhs) const 877 { 878 Value *value = rhs.loadValue(); 879 storeValue(value); 880 881 return RValue<Bool>(value); 882 } 883 884 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const 885 { 886 Value *value = rhs.loadValue(); 887 storeValue(value); 888 889 return RValue<Bool>(value); 890 } 891 892 RValue<Bool> operator!(RValue<Bool> val) 893 { 894 return RValue<Bool>(Nucleus::createNot(val.value)); 895 } 896 897 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs) 898 { 899 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value)); 900 } 901 902 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs) 903 { 904 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value)); 905 } 906 907 Type *Bool::getType() 908 { 909 return T(llvm::Type::getInt1Ty(*::context)); 910 } 911 912 Byte::Byte(Argument<Byte> argument) 913 { 914 storeValue(argument.value); 915 } 916 917 Byte::Byte(RValue<Int> cast) 918 { 919 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 920 921 storeValue(integer); 922 } 923 924 Byte::Byte(RValue<UInt> cast) 925 { 926 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 927 928 storeValue(integer); 929 } 930 931 Byte::Byte(RValue<UShort> cast) 932 { 933 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType()); 934 935 storeValue(integer); 936 } 937 938 Byte::Byte() 939 { 940 } 941 942 Byte::Byte(int x) 943 { 944 storeValue(Nucleus::createConstantByte((unsigned char)x)); 945 } 946 947 Byte::Byte(unsigned char x) 948 { 949 storeValue(Nucleus::createConstantByte(x)); 950 } 951 952 Byte::Byte(RValue<Byte> rhs) 953 { 954 storeValue(rhs.value); 955 } 956 957 Byte::Byte(const Byte &rhs) 958 { 959 Value *value = rhs.loadValue(); 960 storeValue(value); 961 } 962 963 Byte::Byte(const Reference<Byte> &rhs) 964 { 965 Value *value = rhs.loadValue(); 966 storeValue(value); 967 } 968 969 RValue<Byte> Byte::operator=(RValue<Byte> rhs) const 970 { 971 storeValue(rhs.value); 972 973 return rhs; 974 } 975 976 RValue<Byte> Byte::operator=(const Byte &rhs) const 977 { 978 Value *value = rhs.loadValue(); 979 storeValue(value); 980 981 return RValue<Byte>(value); 982 } 983 984 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const 985 { 986 Value *value = rhs.loadValue(); 987 storeValue(value); 988 989 return RValue<Byte>(value); 990 } 991 992 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs) 993 { 994 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value)); 995 } 996 997 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs) 998 { 999 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value)); 1000 } 1001 1002 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs) 1003 { 1004 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value)); 1005 } 1006 1007 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs) 1008 { 1009 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value)); 1010 } 1011 1012 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs) 1013 { 1014 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value)); 1015 } 1016 1017 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs) 1018 { 1019 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value)); 1020 } 1021 1022 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs) 1023 { 1024 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value)); 1025 } 1026 1027 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs) 1028 { 1029 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value)); 1030 } 1031 1032 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs) 1033 { 1034 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value)); 1035 } 1036 1037 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs) 1038 { 1039 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value)); 1040 } 1041 1042 RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs) 1043 { 1044 return lhs = lhs + rhs; 1045 } 1046 1047 RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs) 1048 { 1049 return lhs = lhs - rhs; 1050 } 1051 1052 RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs) 1053 { 1054 return lhs = lhs * rhs; 1055 } 1056 1057 RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs) 1058 { 1059 return lhs = lhs / rhs; 1060 } 1061 1062 RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs) 1063 { 1064 return lhs = lhs % rhs; 1065 } 1066 1067 RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs) 1068 { 1069 return lhs = lhs & rhs; 1070 } 1071 1072 RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs) 1073 { 1074 return lhs = lhs | rhs; 1075 } 1076 1077 RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs) 1078 { 1079 return lhs = lhs ^ rhs; 1080 } 1081 1082 RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs) 1083 { 1084 return lhs = lhs << rhs; 1085 } 1086 1087 RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs) 1088 { 1089 return lhs = lhs >> rhs; 1090 } 1091 1092 RValue<Byte> operator+(RValue<Byte> val) 1093 { 1094 return val; 1095 } 1096 1097 RValue<Byte> operator-(RValue<Byte> val) 1098 { 1099 return RValue<Byte>(Nucleus::createNeg(val.value)); 1100 } 1101 1102 RValue<Byte> operator~(RValue<Byte> val) 1103 { 1104 return RValue<Byte>(Nucleus::createNot(val.value)); 1105 } 1106 1107 RValue<Byte> operator++(const Byte &val, int) // Post-increment 1108 { 1109 RValue<Byte> res = val; 1110 1111 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1))); 1112 val.storeValue(inc); 1113 1114 return res; 1115 } 1116 1117 const Byte &operator++(const Byte &val) // Pre-increment 1118 { 1119 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1))); 1120 val.storeValue(inc); 1121 1122 return val; 1123 } 1124 1125 RValue<Byte> operator--(const Byte &val, int) // Post-decrement 1126 { 1127 RValue<Byte> res = val; 1128 1129 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1))); 1130 val.storeValue(inc); 1131 1132 return res; 1133 } 1134 1135 const Byte &operator--(const Byte &val) // Pre-decrement 1136 { 1137 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1))); 1138 val.storeValue(inc); 1139 1140 return val; 1141 } 1142 1143 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs) 1144 { 1145 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1146 } 1147 1148 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs) 1149 { 1150 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1151 } 1152 1153 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs) 1154 { 1155 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1156 } 1157 1158 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs) 1159 { 1160 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1161 } 1162 1163 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs) 1164 { 1165 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1166 } 1167 1168 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs) 1169 { 1170 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1171 } 1172 1173 Type *Byte::getType() 1174 { 1175 return T(llvm::Type::getInt8Ty(*::context)); 1176 } 1177 1178 SByte::SByte(Argument<SByte> argument) 1179 { 1180 storeValue(argument.value); 1181 } 1182 1183 SByte::SByte(RValue<Int> cast) 1184 { 1185 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1186 1187 storeValue(integer); 1188 } 1189 1190 SByte::SByte(RValue<Short> cast) 1191 { 1192 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType()); 1193 1194 storeValue(integer); 1195 } 1196 1197 SByte::SByte() 1198 { 1199 } 1200 1201 SByte::SByte(signed char x) 1202 { 1203 storeValue(Nucleus::createConstantByte(x)); 1204 } 1205 1206 SByte::SByte(RValue<SByte> rhs) 1207 { 1208 storeValue(rhs.value); 1209 } 1210 1211 SByte::SByte(const SByte &rhs) 1212 { 1213 Value *value = rhs.loadValue(); 1214 storeValue(value); 1215 } 1216 1217 SByte::SByte(const Reference<SByte> &rhs) 1218 { 1219 Value *value = rhs.loadValue(); 1220 storeValue(value); 1221 } 1222 1223 RValue<SByte> SByte::operator=(RValue<SByte> rhs) const 1224 { 1225 storeValue(rhs.value); 1226 1227 return rhs; 1228 } 1229 1230 RValue<SByte> SByte::operator=(const SByte &rhs) const 1231 { 1232 Value *value = rhs.loadValue(); 1233 storeValue(value); 1234 1235 return RValue<SByte>(value); 1236 } 1237 1238 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const 1239 { 1240 Value *value = rhs.loadValue(); 1241 storeValue(value); 1242 1243 return RValue<SByte>(value); 1244 } 1245 1246 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs) 1247 { 1248 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value)); 1249 } 1250 1251 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs) 1252 { 1253 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value)); 1254 } 1255 1256 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs) 1257 { 1258 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value)); 1259 } 1260 1261 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs) 1262 { 1263 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value)); 1264 } 1265 1266 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs) 1267 { 1268 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value)); 1269 } 1270 1271 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs) 1272 { 1273 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value)); 1274 } 1275 1276 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs) 1277 { 1278 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value)); 1279 } 1280 1281 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs) 1282 { 1283 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value)); 1284 } 1285 1286 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs) 1287 { 1288 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value)); 1289 } 1290 1291 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs) 1292 { 1293 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value)); 1294 } 1295 1296 RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs) 1297 { 1298 return lhs = lhs + rhs; 1299 } 1300 1301 RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs) 1302 { 1303 return lhs = lhs - rhs; 1304 } 1305 1306 RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs) 1307 { 1308 return lhs = lhs * rhs; 1309 } 1310 1311 RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs) 1312 { 1313 return lhs = lhs / rhs; 1314 } 1315 1316 RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs) 1317 { 1318 return lhs = lhs % rhs; 1319 } 1320 1321 RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs) 1322 { 1323 return lhs = lhs & rhs; 1324 } 1325 1326 RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs) 1327 { 1328 return lhs = lhs | rhs; 1329 } 1330 1331 RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs) 1332 { 1333 return lhs = lhs ^ rhs; 1334 } 1335 1336 RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs) 1337 { 1338 return lhs = lhs << rhs; 1339 } 1340 1341 RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs) 1342 { 1343 return lhs = lhs >> rhs; 1344 } 1345 1346 RValue<SByte> operator+(RValue<SByte> val) 1347 { 1348 return val; 1349 } 1350 1351 RValue<SByte> operator-(RValue<SByte> val) 1352 { 1353 return RValue<SByte>(Nucleus::createNeg(val.value)); 1354 } 1355 1356 RValue<SByte> operator~(RValue<SByte> val) 1357 { 1358 return RValue<SByte>(Nucleus::createNot(val.value)); 1359 } 1360 1361 RValue<SByte> operator++(const SByte &val, int) // Post-increment 1362 { 1363 RValue<SByte> res = val; 1364 1365 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1))); 1366 val.storeValue(inc); 1367 1368 return res; 1369 } 1370 1371 const SByte &operator++(const SByte &val) // Pre-increment 1372 { 1373 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1))); 1374 val.storeValue(inc); 1375 1376 return val; 1377 } 1378 1379 RValue<SByte> operator--(const SByte &val, int) // Post-decrement 1380 { 1381 RValue<SByte> res = val; 1382 1383 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1))); 1384 val.storeValue(inc); 1385 1386 return res; 1387 } 1388 1389 const SByte &operator--(const SByte &val) // Pre-decrement 1390 { 1391 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1))); 1392 val.storeValue(inc); 1393 1394 return val; 1395 } 1396 1397 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs) 1398 { 1399 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1400 } 1401 1402 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs) 1403 { 1404 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1405 } 1406 1407 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs) 1408 { 1409 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1410 } 1411 1412 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs) 1413 { 1414 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1415 } 1416 1417 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs) 1418 { 1419 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1420 } 1421 1422 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs) 1423 { 1424 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1425 } 1426 1427 Type *SByte::getType() 1428 { 1429 return T(llvm::Type::getInt8Ty(*::context)); 1430 } 1431 1432 Short::Short(Argument<Short> argument) 1433 { 1434 storeValue(argument.value); 1435 } 1436 1437 Short::Short(RValue<Int> cast) 1438 { 1439 Value *integer = Nucleus::createTrunc(cast.value, Short::getType()); 1440 1441 storeValue(integer); 1442 } 1443 1444 Short::Short() 1445 { 1446 } 1447 1448 Short::Short(short x) 1449 { 1450 storeValue(Nucleus::createConstantShort(x)); 1451 } 1452 1453 Short::Short(RValue<Short> rhs) 1454 { 1455 storeValue(rhs.value); 1456 } 1457 1458 Short::Short(const Short &rhs) 1459 { 1460 Value *value = rhs.loadValue(); 1461 storeValue(value); 1462 } 1463 1464 Short::Short(const Reference<Short> &rhs) 1465 { 1466 Value *value = rhs.loadValue(); 1467 storeValue(value); 1468 } 1469 1470 RValue<Short> Short::operator=(RValue<Short> rhs) const 1471 { 1472 storeValue(rhs.value); 1473 1474 return rhs; 1475 } 1476 1477 RValue<Short> Short::operator=(const Short &rhs) const 1478 { 1479 Value *value = rhs.loadValue(); 1480 storeValue(value); 1481 1482 return RValue<Short>(value); 1483 } 1484 1485 RValue<Short> Short::operator=(const Reference<Short> &rhs) const 1486 { 1487 Value *value = rhs.loadValue(); 1488 storeValue(value); 1489 1490 return RValue<Short>(value); 1491 } 1492 1493 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs) 1494 { 1495 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value)); 1496 } 1497 1498 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs) 1499 { 1500 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value)); 1501 } 1502 1503 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs) 1504 { 1505 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value)); 1506 } 1507 1508 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs) 1509 { 1510 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value)); 1511 } 1512 1513 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs) 1514 { 1515 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value)); 1516 } 1517 1518 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs) 1519 { 1520 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value)); 1521 } 1522 1523 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs) 1524 { 1525 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value)); 1526 } 1527 1528 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs) 1529 { 1530 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value)); 1531 } 1532 1533 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs) 1534 { 1535 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value)); 1536 } 1537 1538 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs) 1539 { 1540 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value)); 1541 } 1542 1543 RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs) 1544 { 1545 return lhs = lhs + rhs; 1546 } 1547 1548 RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs) 1549 { 1550 return lhs = lhs - rhs; 1551 } 1552 1553 RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs) 1554 { 1555 return lhs = lhs * rhs; 1556 } 1557 1558 RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs) 1559 { 1560 return lhs = lhs / rhs; 1561 } 1562 1563 RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs) 1564 { 1565 return lhs = lhs % rhs; 1566 } 1567 1568 RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs) 1569 { 1570 return lhs = lhs & rhs; 1571 } 1572 1573 RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs) 1574 { 1575 return lhs = lhs | rhs; 1576 } 1577 1578 RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs) 1579 { 1580 return lhs = lhs ^ rhs; 1581 } 1582 1583 RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs) 1584 { 1585 return lhs = lhs << rhs; 1586 } 1587 1588 RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs) 1589 { 1590 return lhs = lhs >> rhs; 1591 } 1592 1593 RValue<Short> operator+(RValue<Short> val) 1594 { 1595 return val; 1596 } 1597 1598 RValue<Short> operator-(RValue<Short> val) 1599 { 1600 return RValue<Short>(Nucleus::createNeg(val.value)); 1601 } 1602 1603 RValue<Short> operator~(RValue<Short> val) 1604 { 1605 return RValue<Short>(Nucleus::createNot(val.value)); 1606 } 1607 1608 RValue<Short> operator++(const Short &val, int) // Post-increment 1609 { 1610 RValue<Short> res = val; 1611 1612 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1))); 1613 val.storeValue(inc); 1614 1615 return res; 1616 } 1617 1618 const Short &operator++(const Short &val) // Pre-increment 1619 { 1620 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1))); 1621 val.storeValue(inc); 1622 1623 return val; 1624 } 1625 1626 RValue<Short> operator--(const Short &val, int) // Post-decrement 1627 { 1628 RValue<Short> res = val; 1629 1630 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1))); 1631 val.storeValue(inc); 1632 1633 return res; 1634 } 1635 1636 const Short &operator--(const Short &val) // Pre-decrement 1637 { 1638 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1))); 1639 val.storeValue(inc); 1640 1641 return val; 1642 } 1643 1644 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs) 1645 { 1646 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 1647 } 1648 1649 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs) 1650 { 1651 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 1652 } 1653 1654 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs) 1655 { 1656 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 1657 } 1658 1659 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs) 1660 { 1661 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 1662 } 1663 1664 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs) 1665 { 1666 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1667 } 1668 1669 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs) 1670 { 1671 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1672 } 1673 1674 Type *Short::getType() 1675 { 1676 return T(llvm::Type::getInt16Ty(*::context)); 1677 } 1678 1679 UShort::UShort(Argument<UShort> argument) 1680 { 1681 storeValue(argument.value); 1682 } 1683 1684 UShort::UShort(RValue<UInt> cast) 1685 { 1686 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1687 1688 storeValue(integer); 1689 } 1690 1691 UShort::UShort(RValue<Int> cast) 1692 { 1693 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType()); 1694 1695 storeValue(integer); 1696 } 1697 1698 UShort::UShort() 1699 { 1700 } 1701 1702 UShort::UShort(unsigned short x) 1703 { 1704 storeValue(Nucleus::createConstantShort(x)); 1705 } 1706 1707 UShort::UShort(RValue<UShort> rhs) 1708 { 1709 storeValue(rhs.value); 1710 } 1711 1712 UShort::UShort(const UShort &rhs) 1713 { 1714 Value *value = rhs.loadValue(); 1715 storeValue(value); 1716 } 1717 1718 UShort::UShort(const Reference<UShort> &rhs) 1719 { 1720 Value *value = rhs.loadValue(); 1721 storeValue(value); 1722 } 1723 1724 RValue<UShort> UShort::operator=(RValue<UShort> rhs) const 1725 { 1726 storeValue(rhs.value); 1727 1728 return rhs; 1729 } 1730 1731 RValue<UShort> UShort::operator=(const UShort &rhs) const 1732 { 1733 Value *value = rhs.loadValue(); 1734 storeValue(value); 1735 1736 return RValue<UShort>(value); 1737 } 1738 1739 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const 1740 { 1741 Value *value = rhs.loadValue(); 1742 storeValue(value); 1743 1744 return RValue<UShort>(value); 1745 } 1746 1747 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs) 1748 { 1749 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value)); 1750 } 1751 1752 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs) 1753 { 1754 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value)); 1755 } 1756 1757 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs) 1758 { 1759 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value)); 1760 } 1761 1762 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs) 1763 { 1764 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value)); 1765 } 1766 1767 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs) 1768 { 1769 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value)); 1770 } 1771 1772 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs) 1773 { 1774 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value)); 1775 } 1776 1777 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs) 1778 { 1779 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value)); 1780 } 1781 1782 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs) 1783 { 1784 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value)); 1785 } 1786 1787 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs) 1788 { 1789 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value)); 1790 } 1791 1792 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs) 1793 { 1794 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value)); 1795 } 1796 1797 RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs) 1798 { 1799 return lhs = lhs + rhs; 1800 } 1801 1802 RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs) 1803 { 1804 return lhs = lhs - rhs; 1805 } 1806 1807 RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs) 1808 { 1809 return lhs = lhs * rhs; 1810 } 1811 1812 RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs) 1813 { 1814 return lhs = lhs / rhs; 1815 } 1816 1817 RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs) 1818 { 1819 return lhs = lhs % rhs; 1820 } 1821 1822 RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs) 1823 { 1824 return lhs = lhs & rhs; 1825 } 1826 1827 RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs) 1828 { 1829 return lhs = lhs | rhs; 1830 } 1831 1832 RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs) 1833 { 1834 return lhs = lhs ^ rhs; 1835 } 1836 1837 RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs) 1838 { 1839 return lhs = lhs << rhs; 1840 } 1841 1842 RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs) 1843 { 1844 return lhs = lhs >> rhs; 1845 } 1846 1847 RValue<UShort> operator+(RValue<UShort> val) 1848 { 1849 return val; 1850 } 1851 1852 RValue<UShort> operator-(RValue<UShort> val) 1853 { 1854 return RValue<UShort>(Nucleus::createNeg(val.value)); 1855 } 1856 1857 RValue<UShort> operator~(RValue<UShort> val) 1858 { 1859 return RValue<UShort>(Nucleus::createNot(val.value)); 1860 } 1861 1862 RValue<UShort> operator++(const UShort &val, int) // Post-increment 1863 { 1864 RValue<UShort> res = val; 1865 1866 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1))); 1867 val.storeValue(inc); 1868 1869 return res; 1870 } 1871 1872 const UShort &operator++(const UShort &val) // Pre-increment 1873 { 1874 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1))); 1875 val.storeValue(inc); 1876 1877 return val; 1878 } 1879 1880 RValue<UShort> operator--(const UShort &val, int) // Post-decrement 1881 { 1882 RValue<UShort> res = val; 1883 1884 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1))); 1885 val.storeValue(inc); 1886 1887 return res; 1888 } 1889 1890 const UShort &operator--(const UShort &val) // Pre-decrement 1891 { 1892 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1))); 1893 val.storeValue(inc); 1894 1895 return val; 1896 } 1897 1898 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs) 1899 { 1900 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 1901 } 1902 1903 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs) 1904 { 1905 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 1906 } 1907 1908 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs) 1909 { 1910 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 1911 } 1912 1913 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs) 1914 { 1915 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 1916 } 1917 1918 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs) 1919 { 1920 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 1921 } 1922 1923 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs) 1924 { 1925 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 1926 } 1927 1928 Type *UShort::getType() 1929 { 1930 return T(llvm::Type::getInt16Ty(*::context)); 1931 } 1932 1933 Byte4::Byte4(RValue<Byte8> cast) 1934 { 1935 // xyzw.parent = this; 1936 1937 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType())); 1938 } 1939 1940 Byte4::Byte4(const Reference<Byte4> &rhs) 1941 { 1942 // xyzw.parent = this; 1943 1944 Value *value = rhs.loadValue(); 1945 storeValue(value); 1946 } 1947 1948 Type *Byte4::getType() 1949 { 1950 #if 0 1951 return T(VectorType::get(Byte::getType(), 4)); 1952 #else 1953 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block 1954 #endif 1955 } 1956 1957 Type *SByte4::getType() 1958 { 1959 #if 0 1960 return T(VectorType::get(SByte::getType(), 4)); 1961 #else 1962 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block 1963 #endif 1964 } 1965 1966 Byte8::Byte8() 1967 { 1968 // xyzw.parent = this; 1969 } 1970 1971 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 1972 { 1973 // xyzw.parent = this; 1974 1975 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 1976 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Byte::getType(), 8)))); 1977 1978 storeValue(Nucleus::createBitCast(vector, getType())); 1979 } 1980 1981 Byte8::Byte8(RValue<Byte8> rhs) 1982 { 1983 // xyzw.parent = this; 1984 1985 storeValue(rhs.value); 1986 } 1987 1988 Byte8::Byte8(const Byte8 &rhs) 1989 { 1990 // xyzw.parent = this; 1991 1992 Value *value = rhs.loadValue(); 1993 storeValue(value); 1994 } 1995 1996 Byte8::Byte8(const Reference<Byte8> &rhs) 1997 { 1998 // xyzw.parent = this; 1999 2000 Value *value = rhs.loadValue(); 2001 storeValue(value); 2002 } 2003 2004 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const 2005 { 2006 storeValue(rhs.value); 2007 2008 return rhs; 2009 } 2010 2011 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const 2012 { 2013 Value *value = rhs.loadValue(); 2014 storeValue(value); 2015 2016 return RValue<Byte8>(value); 2017 } 2018 2019 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const 2020 { 2021 Value *value = rhs.loadValue(); 2022 storeValue(value); 2023 2024 return RValue<Byte8>(value); 2025 } 2026 2027 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs) 2028 { 2029 if(CPUID::supportsMMX2()) 2030 { 2031 return x86::paddb(lhs, rhs); 2032 } 2033 else 2034 { 2035 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2036 } 2037 } 2038 2039 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs) 2040 { 2041 if(CPUID::supportsMMX2()) 2042 { 2043 return x86::psubb(lhs, rhs); 2044 } 2045 else 2046 { 2047 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value)); 2048 } 2049 } 2050 2051// RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs) 2052// { 2053// return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value)); 2054// } 2055 2056// RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs) 2057// { 2058// return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value)); 2059// } 2060 2061// RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs) 2062// { 2063// return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value)); 2064// } 2065 2066 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs) 2067 { 2068 if(CPUID::supportsMMX2()) 2069 { 2070 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 2071 } 2072 else 2073 { 2074 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2075 } 2076 } 2077 2078 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs) 2079 { 2080 if(CPUID::supportsMMX2()) 2081 { 2082 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 2083 } 2084 else 2085 { 2086 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value)); 2087 } 2088 } 2089 2090 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs) 2091 { 2092 if(CPUID::supportsMMX2()) 2093 { 2094 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 2095 } 2096 else 2097 { 2098 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value)); 2099 } 2100 } 2101 2102// RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs) 2103// { 2104// return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value)); 2105// } 2106 2107// RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs) 2108// { 2109// return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value)); 2110// } 2111 2112 RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs) 2113 { 2114 return lhs = lhs + rhs; 2115 } 2116 2117 RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs) 2118 { 2119 return lhs = lhs - rhs; 2120 } 2121 2122// RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs) 2123// { 2124// return lhs = lhs * rhs; 2125// } 2126 2127// RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs) 2128// { 2129// return lhs = lhs / rhs; 2130// } 2131 2132// RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs) 2133// { 2134// return lhs = lhs % rhs; 2135// } 2136 2137 RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs) 2138 { 2139 return lhs = lhs & rhs; 2140 } 2141 2142 RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs) 2143 { 2144 return lhs = lhs | rhs; 2145 } 2146 2147 RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs) 2148 { 2149 return lhs = lhs ^ rhs; 2150 } 2151 2152// RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs) 2153// { 2154// return lhs = lhs << rhs; 2155// } 2156 2157// RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs) 2158// { 2159// return lhs = lhs >> rhs; 2160// } 2161 2162// RValue<Byte8> operator+(RValue<Byte8> val) 2163// { 2164// return val; 2165// } 2166 2167// RValue<Byte8> operator-(RValue<Byte8> val) 2168// { 2169// return RValue<Byte8>(Nucleus::createNeg(val.value)); 2170// } 2171 2172 RValue<Byte8> operator~(RValue<Byte8> val) 2173 { 2174 if(CPUID::supportsMMX2()) 2175 { 2176 return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 2177 } 2178 else 2179 { 2180 return RValue<Byte8>(Nucleus::createNot(val.value)); 2181 } 2182 } 2183 2184 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y) 2185 { 2186 return x86::paddusb(x, y); 2187 } 2188 2189 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y) 2190 { 2191 return x86::psubusb(x, y); 2192 } 2193 2194 RValue<Short4> Unpack(RValue<Byte4> x) 2195 { 2196 Value *int2 = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0); 2197 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType()); 2198 2199 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8)); 2200 } 2201 2202 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y) 2203 { 2204 if(CPUID::supportsMMX2()) 2205 { 2206 return x86::punpcklbw(x, y); 2207 } 2208 else 2209 { 2210 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 2211 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 2212 2213 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2214 } 2215 } 2216 2217 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y) 2218 { 2219 if(CPUID::supportsMMX2()) 2220 { 2221 return x86::punpckhbw(x, y); 2222 } 2223 else 2224 { 2225 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15}; 2226 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 2227 2228 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2229 } 2230 } 2231 2232 RValue<Int> SignMask(RValue<Byte8> x) 2233 { 2234 return x86::pmovmskb(x); 2235 } 2236 2237// RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y) 2238// { 2239// return x86::pcmpgtb(x, y); // FIXME: Signedness 2240// } 2241 2242 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y) 2243 { 2244 return x86::pcmpeqb(x, y); 2245 } 2246 2247 Type *Byte8::getType() 2248 { 2249 if(CPUID::supportsMMX2()) 2250 { 2251 return MMX::getType(); 2252 } 2253 else 2254 { 2255 return T(VectorType::get(Byte::getType(), 8)); 2256 } 2257 } 2258 2259 SByte8::SByte8() 2260 { 2261 // xyzw.parent = this; 2262 } 2263 2264 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) 2265 { 2266 // xyzw.parent = this; 2267 2268 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 2269 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(SByte::getType(), 8)))); 2270 2271 storeValue(Nucleus::createBitCast(vector, getType())); 2272 } 2273 2274 SByte8::SByte8(RValue<SByte8> rhs) 2275 { 2276 // xyzw.parent = this; 2277 2278 storeValue(rhs.value); 2279 } 2280 2281 SByte8::SByte8(const SByte8 &rhs) 2282 { 2283 // xyzw.parent = this; 2284 2285 Value *value = rhs.loadValue(); 2286 storeValue(value); 2287 } 2288 2289 SByte8::SByte8(const Reference<SByte8> &rhs) 2290 { 2291 // xyzw.parent = this; 2292 2293 Value *value = rhs.loadValue(); 2294 storeValue(value); 2295 } 2296 2297 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const 2298 { 2299 storeValue(rhs.value); 2300 2301 return rhs; 2302 } 2303 2304 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const 2305 { 2306 Value *value = rhs.loadValue(); 2307 storeValue(value); 2308 2309 return RValue<SByte8>(value); 2310 } 2311 2312 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const 2313 { 2314 Value *value = rhs.loadValue(); 2315 storeValue(value); 2316 2317 return RValue<SByte8>(value); 2318 } 2319 2320 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs) 2321 { 2322 if(CPUID::supportsMMX2()) 2323 { 2324 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs))); 2325 } 2326 else 2327 { 2328 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value)); 2329 } 2330 } 2331 2332 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs) 2333 { 2334 if(CPUID::supportsMMX2()) 2335 { 2336 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs))); 2337 } 2338 else 2339 { 2340 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value)); 2341 } 2342 } 2343 2344// RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs) 2345// { 2346// return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value)); 2347// } 2348 2349// RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs) 2350// { 2351// return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value)); 2352// } 2353 2354// RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs) 2355// { 2356// return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value)); 2357// } 2358 2359 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs) 2360 { 2361 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value)); 2362 } 2363 2364 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs) 2365 { 2366 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value)); 2367 } 2368 2369 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs) 2370 { 2371 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value)); 2372 } 2373 2374// RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs) 2375// { 2376// return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value)); 2377// } 2378 2379// RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs) 2380// { 2381// return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value)); 2382// } 2383 2384 RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs) 2385 { 2386 return lhs = lhs + rhs; 2387 } 2388 2389 RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs) 2390 { 2391 return lhs = lhs - rhs; 2392 } 2393 2394// RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs) 2395// { 2396// return lhs = lhs * rhs; 2397// } 2398 2399// RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs) 2400// { 2401// return lhs = lhs / rhs; 2402// } 2403 2404// RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs) 2405// { 2406// return lhs = lhs % rhs; 2407// } 2408 2409 RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs) 2410 { 2411 return lhs = lhs & rhs; 2412 } 2413 2414 RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs) 2415 { 2416 return lhs = lhs | rhs; 2417 } 2418 2419 RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs) 2420 { 2421 return lhs = lhs ^ rhs; 2422 } 2423 2424// RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs) 2425// { 2426// return lhs = lhs << rhs; 2427// } 2428 2429// RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs) 2430// { 2431// return lhs = lhs >> rhs; 2432// } 2433 2434// RValue<SByte8> operator+(RValue<SByte8> val) 2435// { 2436// return val; 2437// } 2438 2439// RValue<SByte8> operator-(RValue<SByte8> val) 2440// { 2441// return RValue<SByte8>(Nucleus::createNeg(val.value)); 2442// } 2443 2444 RValue<SByte8> operator~(RValue<SByte8> val) 2445 { 2446 if(CPUID::supportsMMX2()) 2447 { 2448 return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 2449 } 2450 else 2451 { 2452 return RValue<SByte8>(Nucleus::createNot(val.value)); 2453 } 2454 } 2455 2456 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y) 2457 { 2458 return x86::paddsb(x, y); 2459 } 2460 2461 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y) 2462 { 2463 return x86::psubsb(x, y); 2464 } 2465 2466 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y) 2467 { 2468 if(CPUID::supportsMMX2()) 2469 { 2470 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y))); 2471 } 2472 else 2473 { 2474 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 2475 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 2476 2477 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2478 } 2479 } 2480 2481 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y) 2482 { 2483 if(CPUID::supportsMMX2()) 2484 { 2485 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y))); 2486 } 2487 else 2488 { 2489 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15}; 2490 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 2491 2492 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType())); 2493 } 2494 } 2495 2496 RValue<Int> SignMask(RValue<SByte8> x) 2497 { 2498 return x86::pmovmskb(As<Byte8>(x)); 2499 } 2500 2501 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y) 2502 { 2503 return x86::pcmpgtb(x, y); 2504 } 2505 2506 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y) 2507 { 2508 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y)); 2509 } 2510 2511 Type *SByte8::getType() 2512 { 2513 if(CPUID::supportsMMX2()) 2514 { 2515 return MMX::getType(); 2516 } 2517 else 2518 { 2519 return T(VectorType::get(SByte::getType(), 8)); 2520 } 2521 } 2522 2523 Byte16::Byte16(RValue<Byte16> rhs) 2524 { 2525 // xyzw.parent = this; 2526 2527 storeValue(rhs.value); 2528 } 2529 2530 Byte16::Byte16(const Byte16 &rhs) 2531 { 2532 // xyzw.parent = this; 2533 2534 Value *value = rhs.loadValue(); 2535 storeValue(value); 2536 } 2537 2538 Byte16::Byte16(const Reference<Byte16> &rhs) 2539 { 2540 // xyzw.parent = this; 2541 2542 Value *value = rhs.loadValue(); 2543 storeValue(value); 2544 } 2545 2546 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const 2547 { 2548 storeValue(rhs.value); 2549 2550 return rhs; 2551 } 2552 2553 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const 2554 { 2555 Value *value = rhs.loadValue(); 2556 storeValue(value); 2557 2558 return RValue<Byte16>(value); 2559 } 2560 2561 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const 2562 { 2563 Value *value = rhs.loadValue(); 2564 storeValue(value); 2565 2566 return RValue<Byte16>(value); 2567 } 2568 2569 Type *Byte16::getType() 2570 { 2571 return T(VectorType::get(Byte::getType(), 16)); 2572 } 2573 2574 Type *SByte16::getType() 2575 { 2576 return T( VectorType::get(SByte::getType(), 16)); 2577 } 2578 2579 Short2::Short2(RValue<Short4> cast) 2580 { 2581 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType())); 2582 } 2583 2584 Type *Short2::getType() 2585 { 2586 #if 0 2587 return T(VectorType::get(Short::getType(), 2)); 2588 #else 2589 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block 2590 #endif 2591 } 2592 2593 UShort2::UShort2(RValue<UShort4> cast) 2594 { 2595 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType())); 2596 } 2597 2598 Type *UShort2::getType() 2599 { 2600 #if 0 2601 return T(VectorType::get(UShort::getType(), 2)); 2602 #else 2603 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block 2604 #endif 2605 } 2606 2607 Short4::Short4(RValue<Int> cast) 2608 { 2609 Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 2610 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value; 2611 2612 storeValue(swizzle); 2613 } 2614 2615 Short4::Short4(RValue<Int4> cast) 2616 { 2617 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType()); 2618 2619 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd) 2620 Constant *pack[8]; 2621 pack[0] = Nucleus::createConstantInt(0); 2622 pack[1] = Nucleus::createConstantInt(2); 2623 pack[2] = Nucleus::createConstantInt(4); 2624 pack[3] = Nucleus::createConstantInt(6); 2625 2626 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4)); 2627 #else 2628 Value *packed; 2629 2630 // FIXME: Use Swizzle<Short8> 2631 if(!CPUID::supportsSSSE3()) 2632 { 2633 int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7}; 2634 int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6}; 2635 2636 Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw); 2637 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw); 2638 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType()); 2639 packed = createSwizzle4(int4, 0x88); 2640 } 2641 else 2642 { 2643 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}; 2644 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType()); 2645 packed = Nucleus::createShuffleVector(byte16, byte16, pshufb); 2646 } 2647 2648 #if 0 // FIXME: No optimal instruction selection 2649 Value *qword2 = Nucleus::createBitCast(packed, T(VectorType::get(Long::getType(), 2))); 2650 Value *element = Nucleus::createExtractElement(qword2, 0); 2651 Value *short4 = Nucleus::createBitCast(element, Short4::getType()); 2652 #else // FIXME: Requires SSE 2653 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value; 2654 Value *short4 = Nucleus::createBitCast(int2, Short4::getType()); 2655 #endif 2656 #endif 2657 2658 storeValue(short4); 2659 } 2660 2661// Short4::Short4(RValue<Float> cast) 2662// { 2663// } 2664 2665 Short4::Short4(RValue<Float4> cast) 2666 { 2667 Int4 v4i32 = Int4(cast); 2668 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32)); 2669 2670 storeValue(As<Short4>(Int2(v4i32)).value); 2671 } 2672 2673 Short4::Short4() 2674 { 2675 // xyzw.parent = this; 2676 } 2677 2678 Short4::Short4(short xyzw) 2679 { 2680 // xyzw.parent = this; 2681 2682 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 2683 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4)))); 2684 2685 storeValue(Nucleus::createBitCast(vector, getType())); 2686 } 2687 2688 Short4::Short4(short x, short y, short z, short w) 2689 { 2690 // xyzw.parent = this; 2691 2692 int64_t constantVector[4] = {x, y, z, w}; 2693 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4)))); 2694 2695 storeValue(Nucleus::createBitCast(vector, getType())); 2696 } 2697 2698 Short4::Short4(RValue<Short4> rhs) 2699 { 2700 // xyzw.parent = this; 2701 2702 storeValue(rhs.value); 2703 } 2704 2705 Short4::Short4(const Short4 &rhs) 2706 { 2707 // xyzw.parent = this; 2708 2709 Value *value = rhs.loadValue(); 2710 storeValue(value); 2711 } 2712 2713 Short4::Short4(const Reference<Short4> &rhs) 2714 { 2715 // xyzw.parent = this; 2716 2717 Value *value = rhs.loadValue(); 2718 storeValue(value); 2719 } 2720 2721 Short4::Short4(RValue<UShort4> rhs) 2722 { 2723 // xyzw.parent = this; 2724 2725 storeValue(rhs.value); 2726 } 2727 2728 Short4::Short4(const UShort4 &rhs) 2729 { 2730 // xyzw.parent = this; 2731 2732 storeValue(rhs.loadValue()); 2733 } 2734 2735 Short4::Short4(const Reference<UShort4> &rhs) 2736 { 2737 // xyzw.parent = this; 2738 2739 storeValue(rhs.loadValue()); 2740 } 2741 2742 RValue<Short4> Short4::operator=(RValue<Short4> rhs) const 2743 { 2744 storeValue(rhs.value); 2745 2746 return rhs; 2747 } 2748 2749 RValue<Short4> Short4::operator=(const Short4 &rhs) const 2750 { 2751 Value *value = rhs.loadValue(); 2752 storeValue(value); 2753 2754 return RValue<Short4>(value); 2755 } 2756 2757 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const 2758 { 2759 Value *value = rhs.loadValue(); 2760 storeValue(value); 2761 2762 return RValue<Short4>(value); 2763 } 2764 2765 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const 2766 { 2767 storeValue(rhs.value); 2768 2769 return RValue<Short4>(rhs); 2770 } 2771 2772 RValue<Short4> Short4::operator=(const UShort4 &rhs) const 2773 { 2774 Value *value = rhs.loadValue(); 2775 storeValue(value); 2776 2777 return RValue<Short4>(value); 2778 } 2779 2780 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const 2781 { 2782 Value *value = rhs.loadValue(); 2783 storeValue(value); 2784 2785 return RValue<Short4>(value); 2786 } 2787 2788 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs) 2789 { 2790 if(CPUID::supportsMMX2()) 2791 { 2792 return x86::paddw(lhs, rhs); 2793 } 2794 else 2795 { 2796 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value)); 2797 } 2798 } 2799 2800 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs) 2801 { 2802 if(CPUID::supportsMMX2()) 2803 { 2804 return x86::psubw(lhs, rhs); 2805 } 2806 else 2807 { 2808 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value)); 2809 } 2810 } 2811 2812 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs) 2813 { 2814 if(CPUID::supportsMMX2()) 2815 { 2816 return x86::pmullw(lhs, rhs); 2817 } 2818 else 2819 { 2820 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value)); 2821 } 2822 } 2823 2824// RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs) 2825// { 2826// return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value)); 2827// } 2828 2829// RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs) 2830// { 2831// return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value)); 2832// } 2833 2834 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs) 2835 { 2836 if(CPUID::supportsMMX2()) 2837 { 2838 return x86::pand(lhs, rhs); 2839 } 2840 else 2841 { 2842 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value)); 2843 } 2844 } 2845 2846 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs) 2847 { 2848 if(CPUID::supportsMMX2()) 2849 { 2850 return x86::por(lhs, rhs); 2851 } 2852 else 2853 { 2854 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value)); 2855 } 2856 } 2857 2858 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs) 2859 { 2860 if(CPUID::supportsMMX2()) 2861 { 2862 return x86::pxor(lhs, rhs); 2863 } 2864 else 2865 { 2866 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value)); 2867 } 2868 } 2869 2870 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs) 2871 { 2872 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 2873 2874 return x86::psllw(lhs, rhs); 2875 } 2876 2877 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs) 2878 { 2879 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value)); 2880 2881 return x86::psraw(lhs, rhs); 2882 } 2883 2884 RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs) 2885 { 2886 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 2887 2888 return x86::psllw(lhs, rhs); 2889 } 2890 2891 RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs) 2892 { 2893 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value)); 2894 2895 return x86::psraw(lhs, rhs); 2896 } 2897 2898 RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs) 2899 { 2900 return lhs = lhs + rhs; 2901 } 2902 2903 RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs) 2904 { 2905 return lhs = lhs - rhs; 2906 } 2907 2908 RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs) 2909 { 2910 return lhs = lhs * rhs; 2911 } 2912 2913// RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs) 2914// { 2915// return lhs = lhs / rhs; 2916// } 2917 2918// RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs) 2919// { 2920// return lhs = lhs % rhs; 2921// } 2922 2923 RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs) 2924 { 2925 return lhs = lhs & rhs; 2926 } 2927 2928 RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs) 2929 { 2930 return lhs = lhs | rhs; 2931 } 2932 2933 RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs) 2934 { 2935 return lhs = lhs ^ rhs; 2936 } 2937 2938 RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs) 2939 { 2940 return lhs = lhs << rhs; 2941 } 2942 2943 RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs) 2944 { 2945 return lhs = lhs >> rhs; 2946 } 2947 2948 RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs) 2949 { 2950 return lhs = lhs << rhs; 2951 } 2952 2953 RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs) 2954 { 2955 return lhs = lhs >> rhs; 2956 } 2957 2958// RValue<Short4> operator+(RValue<Short4> val) 2959// { 2960// return val; 2961// } 2962 2963 RValue<Short4> operator-(RValue<Short4> val) 2964 { 2965 if(CPUID::supportsMMX2()) 2966 { 2967 return Short4(0, 0, 0, 0) - val; 2968 } 2969 else 2970 { 2971 return RValue<Short4>(Nucleus::createNeg(val.value)); 2972 } 2973 } 2974 2975 RValue<Short4> operator~(RValue<Short4> val) 2976 { 2977 if(CPUID::supportsMMX2()) 2978 { 2979 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu); 2980 } 2981 else 2982 { 2983 return RValue<Short4>(Nucleus::createNot(val.value)); 2984 } 2985 } 2986 2987 RValue<Short4> RoundShort4(RValue<Float4> cast) 2988 { 2989 RValue<Int4> v4i32 = x86::cvtps2dq(cast); 2990 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32); 2991 2992 return As<Short4>(Int2(As<Int4>(v8i16))); 2993 } 2994 2995 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y) 2996 { 2997 return x86::pmaxsw(x, y); 2998 } 2999 3000 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y) 3001 { 3002 return x86::pminsw(x, y); 3003 } 3004 3005 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y) 3006 { 3007 return x86::paddsw(x, y); 3008 } 3009 3010 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y) 3011 { 3012 return x86::psubsw(x, y); 3013 } 3014 3015 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y) 3016 { 3017 return x86::pmulhw(x, y); 3018 } 3019 3020 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y) 3021 { 3022 return x86::pmaddwd(x, y); 3023 } 3024 3025 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y) 3026 { 3027 return x86::packsswb(x, y); 3028 } 3029 3030 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y) 3031 { 3032 if(CPUID::supportsMMX2()) 3033 { 3034 return x86::punpcklwd(x, y); 3035 } 3036 else 3037 { 3038 int shuffle[4] = {0, 4, 1, 5}; 3039 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 3040 3041 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType())); 3042 } 3043 } 3044 3045 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y) 3046 { 3047 if(CPUID::supportsMMX2()) 3048 { 3049 return x86::punpckhwd(x, y); 3050 } 3051 else 3052 { 3053 int shuffle[4] = {2, 6, 3, 7}; 3054 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 3055 3056 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType())); 3057 } 3058 } 3059 3060 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select) 3061 { 3062 if(CPUID::supportsMMX2()) 3063 { 3064 return x86::pshufw(x, select); 3065 } 3066 else 3067 { 3068 return RValue<Short4>(createSwizzle4(x.value, select)); 3069 } 3070 } 3071 3072 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i) 3073 { 3074 if(CPUID::supportsMMX2()) 3075 { 3076 return x86::pinsrw(val, Int(element), i); 3077 } 3078 else 3079 { 3080 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i)); 3081 } 3082 } 3083 3084 RValue<Short> Extract(RValue<Short4> val, int i) 3085 { 3086 if(CPUID::supportsMMX2()) 3087 { 3088 return Short(x86::pextrw(val, i)); 3089 } 3090 else 3091 { 3092 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i)); 3093 } 3094 } 3095 3096 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y) 3097 { 3098 return x86::pcmpgtw(x, y); 3099 } 3100 3101 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y) 3102 { 3103 return x86::pcmpeqw(x, y); 3104 } 3105 3106 Type *Short4::getType() 3107 { 3108 if(CPUID::supportsMMX2()) 3109 { 3110 return MMX::getType(); 3111 } 3112 else 3113 { 3114 return T(VectorType::get(Short::getType(), 4)); 3115 } 3116 } 3117 3118 UShort4::UShort4(RValue<Int4> cast) 3119 { 3120 *this = Short4(cast); 3121 } 3122 3123 UShort4::UShort4(RValue<Float4> cast, bool saturate) 3124 { 3125 Float4 sat; 3126 3127 if(saturate) 3128 { 3129 if(CPUID::supportsSSE4_1()) 3130 { 3131 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation 3132 } 3133 else 3134 { 3135 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000)); 3136 } 3137 } 3138 else 3139 { 3140 sat = cast; 3141 } 3142 3143 Int4 int4(sat); 3144 3145 if(!saturate || !CPUID::supportsSSE4_1()) 3146 { 3147 *this = Short4(Int4(int4)); 3148 } 3149 else 3150 { 3151 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4))))); 3152 } 3153 } 3154 3155 UShort4::UShort4() 3156 { 3157 // xyzw.parent = this; 3158 } 3159 3160 UShort4::UShort4(unsigned short xyzw) 3161 { 3162 // xyzw.parent = this; 3163 3164 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw}; 3165 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4)))); 3166 3167 storeValue(Nucleus::createBitCast(vector, getType())); 3168 } 3169 3170 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 3171 { 3172 // xyzw.parent = this; 3173 3174 int64_t constantVector[4] = {x, y, z, w}; 3175 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4)))); 3176 3177 storeValue(Nucleus::createBitCast(vector, getType())); 3178 } 3179 3180 UShort4::UShort4(RValue<UShort4> rhs) 3181 { 3182 // xyzw.parent = this; 3183 3184 storeValue(rhs.value); 3185 } 3186 3187 UShort4::UShort4(const UShort4 &rhs) 3188 { 3189 // xyzw.parent = this; 3190 3191 Value *value = rhs.loadValue(); 3192 storeValue(value); 3193 } 3194 3195 UShort4::UShort4(const Reference<UShort4> &rhs) 3196 { 3197 // xyzw.parent = this; 3198 3199 Value *value = rhs.loadValue(); 3200 storeValue(value); 3201 } 3202 3203 UShort4::UShort4(RValue<Short4> rhs) 3204 { 3205 // xyzw.parent = this; 3206 3207 storeValue(rhs.value); 3208 } 3209 3210 UShort4::UShort4(const Short4 &rhs) 3211 { 3212 // xyzw.parent = this; 3213 3214 Value *value = rhs.loadValue(); 3215 storeValue(value); 3216 } 3217 3218 UShort4::UShort4(const Reference<Short4> &rhs) 3219 { 3220 // xyzw.parent = this; 3221 3222 Value *value = rhs.loadValue(); 3223 storeValue(value); 3224 } 3225 3226 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const 3227 { 3228 storeValue(rhs.value); 3229 3230 return rhs; 3231 } 3232 3233 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const 3234 { 3235 Value *value = rhs.loadValue(); 3236 storeValue(value); 3237 3238 return RValue<UShort4>(value); 3239 } 3240 3241 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const 3242 { 3243 Value *value = rhs.loadValue(); 3244 storeValue(value); 3245 3246 return RValue<UShort4>(value); 3247 } 3248 3249 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const 3250 { 3251 storeValue(rhs.value); 3252 3253 return RValue<UShort4>(rhs); 3254 } 3255 3256 RValue<UShort4> UShort4::operator=(const Short4 &rhs) const 3257 { 3258 Value *value = rhs.loadValue(); 3259 storeValue(value); 3260 3261 return RValue<UShort4>(value); 3262 } 3263 3264 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const 3265 { 3266 Value *value = rhs.loadValue(); 3267 storeValue(value); 3268 3269 return RValue<UShort4>(value); 3270 } 3271 3272 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs) 3273 { 3274 if(CPUID::supportsMMX2()) 3275 { 3276 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs))); 3277 } 3278 else 3279 { 3280 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value)); 3281 } 3282 } 3283 3284 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs) 3285 { 3286 if(CPUID::supportsMMX2()) 3287 { 3288 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs))); 3289 } 3290 else 3291 { 3292 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value)); 3293 } 3294 } 3295 3296 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs) 3297 { 3298 if(CPUID::supportsMMX2()) 3299 { 3300 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs))); 3301 } 3302 else 3303 { 3304 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value)); 3305 } 3306 } 3307 3308 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs) 3309 { 3310 if(CPUID::supportsMMX2()) 3311 { 3312 return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 3313 } 3314 else 3315 { 3316 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value)); 3317 } 3318 } 3319 3320 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs) 3321 { 3322 if(CPUID::supportsMMX2()) 3323 { 3324 return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 3325 } 3326 else 3327 { 3328 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value)); 3329 } 3330 } 3331 3332 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs) 3333 { 3334 if(CPUID::supportsMMX2()) 3335 { 3336 return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 3337 } 3338 else 3339 { 3340 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value)); 3341 } 3342 } 3343 3344 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs) 3345 { 3346 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3347 3348 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3349 } 3350 3351 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs) 3352 { 3353 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3354 3355 return x86::psrlw(lhs, rhs); 3356 } 3357 3358 RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs) 3359 { 3360 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value)); 3361 3362 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs)); 3363 } 3364 3365 RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs) 3366 { 3367 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value)); 3368 3369 return x86::psrlw(lhs, rhs); 3370 } 3371 3372 RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs) 3373 { 3374 return lhs = lhs << rhs; 3375 } 3376 3377 RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs) 3378 { 3379 return lhs = lhs >> rhs; 3380 } 3381 3382 RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs) 3383 { 3384 return lhs = lhs << rhs; 3385 } 3386 3387 RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs) 3388 { 3389 return lhs = lhs >> rhs; 3390 } 3391 3392 RValue<UShort4> operator~(RValue<UShort4> val) 3393 { 3394 if(CPUID::supportsMMX2()) 3395 { 3396 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu)); 3397 } 3398 else 3399 { 3400 return RValue<UShort4>(Nucleus::createNot(val.value)); 3401 } 3402 } 3403 3404 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y) 3405 { 3406 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3407 } 3408 3409 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y) 3410 { 3411 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)); 3412 } 3413 3414 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y) 3415 { 3416 return x86::paddusw(x, y); 3417 } 3418 3419 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y) 3420 { 3421 return x86::psubusw(x, y); 3422 } 3423 3424 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y) 3425 { 3426 return x86::pmulhuw(x, y); 3427 } 3428 3429 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y) 3430 { 3431 return x86::pavgw(x, y); 3432 } 3433 3434 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y) 3435 { 3436 return x86::packuswb(x, y); 3437 } 3438 3439 Type *UShort4::getType() 3440 { 3441 if(CPUID::supportsMMX2()) 3442 { 3443 return MMX::getType(); 3444 } 3445 else 3446 { 3447 return T(VectorType::get(UShort::getType(), 4)); 3448 } 3449 } 3450 3451 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7) 3452 { 3453 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3454 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3455 } 3456 3457 Short8::Short8(RValue<Short8> rhs) 3458 { 3459 storeValue(rhs.value); 3460 } 3461 3462 Short8::Short8(const Reference<Short8> &rhs) 3463 { 3464 Value *value = rhs.loadValue(); 3465 storeValue(value); 3466 } 3467 3468 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi) 3469 { 3470 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 3471 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 3472 3473 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2))); 3474 long2 = Nucleus::createInsertElement(long2, loLong, 0); 3475 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 3476 Value *short8 = Nucleus::createBitCast(long2, Short8::getType()); 3477 3478 storeValue(short8); 3479 } 3480 3481 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs) 3482 { 3483 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value)); 3484 } 3485 3486 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs) 3487 { 3488 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value)); 3489 } 3490 3491 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs) 3492 { 3493 return x86::psllw(lhs, rhs); // FIXME: Fallback required 3494 } 3495 3496 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs) 3497 { 3498 return x86::psraw(lhs, rhs); // FIXME: Fallback required 3499 } 3500 3501 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y) 3502 { 3503 return x86::pmaddwd(x, y); // FIXME: Fallback required 3504 } 3505 3506 RValue<Int4> Abs(RValue<Int4> x) 3507 { 3508 if(CPUID::supportsSSSE3()) 3509 { 3510 return x86::pabsd(x); 3511 } 3512 else 3513 { 3514 Int4 mask = (x >> 31); 3515 return (mask ^ x) - mask; 3516 } 3517 } 3518 3519 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y) 3520 { 3521 return x86::pmulhw(x, y); // FIXME: Fallback required 3522 } 3523 3524 Type *Short8::getType() 3525 { 3526 return T(VectorType::get(Short::getType(), 8)); 3527 } 3528 3529 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7) 3530 { 3531 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7}; 3532 storeValue(Nucleus::createConstantVector(constantVector, getType())); 3533 } 3534 3535 UShort8::UShort8(RValue<UShort8> rhs) 3536 { 3537 storeValue(rhs.value); 3538 } 3539 3540 UShort8::UShort8(const Reference<UShort8> &rhs) 3541 { 3542 Value *value = rhs.loadValue(); 3543 storeValue(value); 3544 } 3545 3546 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi) 3547 { 3548 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 3549 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 3550 3551 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2))); 3552 long2 = Nucleus::createInsertElement(long2, loLong, 0); 3553 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 3554 Value *short8 = Nucleus::createBitCast(long2, Short8::getType()); 3555 3556 storeValue(short8); 3557 } 3558 3559 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const 3560 { 3561 storeValue(rhs.value); 3562 3563 return rhs; 3564 } 3565 3566 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const 3567 { 3568 Value *value = rhs.loadValue(); 3569 storeValue(value); 3570 3571 return RValue<UShort8>(value); 3572 } 3573 3574 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const 3575 { 3576 Value *value = rhs.loadValue(); 3577 storeValue(value); 3578 3579 return RValue<UShort8>(value); 3580 } 3581 3582 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs) 3583 { 3584 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value)); 3585 } 3586 3587 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs) 3588 { 3589 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required 3590 } 3591 3592 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs) 3593 { 3594 return x86::psrlw(lhs, rhs); // FIXME: Fallback required 3595 } 3596 3597 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs) 3598 { 3599 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value)); 3600 } 3601 3602 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs) 3603 { 3604 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value)); 3605 } 3606 3607 RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs) 3608 { 3609 return lhs = lhs + rhs; 3610 } 3611 3612 RValue<UShort8> operator~(RValue<UShort8> val) 3613 { 3614 return RValue<UShort8>(Nucleus::createNot(val.value)); 3615 } 3616 3617 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7) 3618 { 3619 int pshufb[16] = 3620 { 3621 select0 + 0, 3622 select0 + 1, 3623 select1 + 0, 3624 select1 + 1, 3625 select2 + 0, 3626 select2 + 1, 3627 select3 + 0, 3628 select3 + 1, 3629 select4 + 0, 3630 select4 + 1, 3631 select5 + 0, 3632 select5 + 1, 3633 select6 + 0, 3634 select6 + 1, 3635 select7 + 0, 3636 select7 + 1, 3637 }; 3638 3639 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType()); 3640 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb); 3641 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType()); 3642 3643 return RValue<UShort8>(short8); 3644 } 3645 3646 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y) 3647 { 3648 return x86::pmulhuw(x, y); // FIXME: Fallback required 3649 } 3650 3651 Type *UShort8::getType() 3652 { 3653 return T(VectorType::get(UShort::getType(), 8)); 3654 } 3655 3656 Int::Int(Argument<Int> argument) 3657 { 3658 storeValue(argument.value); 3659 } 3660 3661 Int::Int(RValue<Byte> cast) 3662 { 3663 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3664 3665 storeValue(integer); 3666 } 3667 3668 Int::Int(RValue<SByte> cast) 3669 { 3670 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3671 3672 storeValue(integer); 3673 } 3674 3675 Int::Int(RValue<Short> cast) 3676 { 3677 Value *integer = Nucleus::createSExt(cast.value, Int::getType()); 3678 3679 storeValue(integer); 3680 } 3681 3682 Int::Int(RValue<UShort> cast) 3683 { 3684 Value *integer = Nucleus::createZExt(cast.value, Int::getType()); 3685 3686 storeValue(integer); 3687 } 3688 3689 Int::Int(RValue<Int2> cast) 3690 { 3691 *this = Extract(cast, 0); 3692 } 3693 3694 Int::Int(RValue<Long> cast) 3695 { 3696 Value *integer = Nucleus::createTrunc(cast.value, Int::getType()); 3697 3698 storeValue(integer); 3699 } 3700 3701 Int::Int(RValue<Float> cast) 3702 { 3703 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType()); 3704 3705 storeValue(integer); 3706 } 3707 3708 Int::Int() 3709 { 3710 } 3711 3712 Int::Int(int x) 3713 { 3714 storeValue(Nucleus::createConstantInt(x)); 3715 } 3716 3717 Int::Int(RValue<Int> rhs) 3718 { 3719 storeValue(rhs.value); 3720 } 3721 3722 Int::Int(RValue<UInt> rhs) 3723 { 3724 storeValue(rhs.value); 3725 } 3726 3727 Int::Int(const Int &rhs) 3728 { 3729 Value *value = rhs.loadValue(); 3730 storeValue(value); 3731 } 3732 3733 Int::Int(const Reference<Int> &rhs) 3734 { 3735 Value *value = rhs.loadValue(); 3736 storeValue(value); 3737 } 3738 3739 Int::Int(const UInt &rhs) 3740 { 3741 Value *value = rhs.loadValue(); 3742 storeValue(value); 3743 } 3744 3745 Int::Int(const Reference<UInt> &rhs) 3746 { 3747 Value *value = rhs.loadValue(); 3748 storeValue(value); 3749 } 3750 3751 RValue<Int> Int::operator=(int rhs) const 3752 { 3753 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs))); 3754 } 3755 3756 RValue<Int> Int::operator=(RValue<Int> rhs) const 3757 { 3758 storeValue(rhs.value); 3759 3760 return rhs; 3761 } 3762 3763 RValue<Int> Int::operator=(RValue<UInt> rhs) const 3764 { 3765 storeValue(rhs.value); 3766 3767 return RValue<Int>(rhs); 3768 } 3769 3770 RValue<Int> Int::operator=(const Int &rhs) const 3771 { 3772 Value *value = rhs.loadValue(); 3773 storeValue(value); 3774 3775 return RValue<Int>(value); 3776 } 3777 3778 RValue<Int> Int::operator=(const Reference<Int> &rhs) const 3779 { 3780 Value *value = rhs.loadValue(); 3781 storeValue(value); 3782 3783 return RValue<Int>(value); 3784 } 3785 3786 RValue<Int> Int::operator=(const UInt &rhs) const 3787 { 3788 Value *value = rhs.loadValue(); 3789 storeValue(value); 3790 3791 return RValue<Int>(value); 3792 } 3793 3794 RValue<Int> Int::operator=(const Reference<UInt> &rhs) const 3795 { 3796 Value *value = rhs.loadValue(); 3797 storeValue(value); 3798 3799 return RValue<Int>(value); 3800 } 3801 3802 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs) 3803 { 3804 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value)); 3805 } 3806 3807 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs) 3808 { 3809 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value)); 3810 } 3811 3812 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs) 3813 { 3814 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value)); 3815 } 3816 3817 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs) 3818 { 3819 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value)); 3820 } 3821 3822 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs) 3823 { 3824 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value)); 3825 } 3826 3827 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs) 3828 { 3829 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value)); 3830 } 3831 3832 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs) 3833 { 3834 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value)); 3835 } 3836 3837 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs) 3838 { 3839 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value)); 3840 } 3841 3842 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs) 3843 { 3844 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value)); 3845 } 3846 3847 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs) 3848 { 3849 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value)); 3850 } 3851 3852 RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs) 3853 { 3854 return lhs = lhs + rhs; 3855 } 3856 3857 RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs) 3858 { 3859 return lhs = lhs - rhs; 3860 } 3861 3862 RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs) 3863 { 3864 return lhs = lhs * rhs; 3865 } 3866 3867 RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs) 3868 { 3869 return lhs = lhs / rhs; 3870 } 3871 3872 RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs) 3873 { 3874 return lhs = lhs % rhs; 3875 } 3876 3877 RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs) 3878 { 3879 return lhs = lhs & rhs; 3880 } 3881 3882 RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs) 3883 { 3884 return lhs = lhs | rhs; 3885 } 3886 3887 RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs) 3888 { 3889 return lhs = lhs ^ rhs; 3890 } 3891 3892 RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs) 3893 { 3894 return lhs = lhs << rhs; 3895 } 3896 3897 RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs) 3898 { 3899 return lhs = lhs >> rhs; 3900 } 3901 3902 RValue<Int> operator+(RValue<Int> val) 3903 { 3904 return val; 3905 } 3906 3907 RValue<Int> operator-(RValue<Int> val) 3908 { 3909 return RValue<Int>(Nucleus::createNeg(val.value)); 3910 } 3911 3912 RValue<Int> operator~(RValue<Int> val) 3913 { 3914 return RValue<Int>(Nucleus::createNot(val.value)); 3915 } 3916 3917 RValue<Int> operator++(const Int &val, int) // Post-increment 3918 { 3919 RValue<Int> res = val; 3920 3921 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1))); 3922 val.storeValue(inc); 3923 3924 return res; 3925 } 3926 3927 const Int &operator++(const Int &val) // Pre-increment 3928 { 3929 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1))); 3930 val.storeValue(inc); 3931 3932 return val; 3933 } 3934 3935 RValue<Int> operator--(const Int &val, int) // Post-decrement 3936 { 3937 RValue<Int> res = val; 3938 3939 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1))); 3940 val.storeValue(inc); 3941 3942 return res; 3943 } 3944 3945 const Int &operator--(const Int &val) // Pre-decrement 3946 { 3947 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1))); 3948 val.storeValue(inc); 3949 3950 return val; 3951 } 3952 3953 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs) 3954 { 3955 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value)); 3956 } 3957 3958 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs) 3959 { 3960 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value)); 3961 } 3962 3963 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs) 3964 { 3965 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value)); 3966 } 3967 3968 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs) 3969 { 3970 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value)); 3971 } 3972 3973 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs) 3974 { 3975 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 3976 } 3977 3978 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs) 3979 { 3980 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 3981 } 3982 3983 RValue<Int> Max(RValue<Int> x, RValue<Int> y) 3984 { 3985 return IfThenElse(x > y, x, y); 3986 } 3987 3988 RValue<Int> Min(RValue<Int> x, RValue<Int> y) 3989 { 3990 return IfThenElse(x < y, x, y); 3991 } 3992 3993 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max) 3994 { 3995 return Min(Max(x, min), max); 3996 } 3997 3998 RValue<Int> RoundInt(RValue<Float> cast) 3999 { 4000 return x86::cvtss2si(cast); 4001 4002 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 4003 } 4004 4005 Type *Int::getType() 4006 { 4007 return T(llvm::Type::getInt32Ty(*::context)); 4008 } 4009 4010 Long::Long(RValue<Int> cast) 4011 { 4012 Value *integer = Nucleus::createSExt(cast.value, Long::getType()); 4013 4014 storeValue(integer); 4015 } 4016 4017 Long::Long(RValue<UInt> cast) 4018 { 4019 Value *integer = Nucleus::createZExt(cast.value, Long::getType()); 4020 4021 storeValue(integer); 4022 } 4023 4024 Long::Long() 4025 { 4026 } 4027 4028 Long::Long(RValue<Long> rhs) 4029 { 4030 storeValue(rhs.value); 4031 } 4032 4033 RValue<Long> Long::operator=(int64_t rhs) const 4034 { 4035 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs))); 4036 } 4037 4038 RValue<Long> Long::operator=(RValue<Long> rhs) const 4039 { 4040 storeValue(rhs.value); 4041 4042 return rhs; 4043 } 4044 4045 RValue<Long> Long::operator=(const Long &rhs) const 4046 { 4047 Value *value = rhs.loadValue(); 4048 storeValue(value); 4049 4050 return RValue<Long>(value); 4051 } 4052 4053 RValue<Long> Long::operator=(const Reference<Long> &rhs) const 4054 { 4055 Value *value = rhs.loadValue(); 4056 storeValue(value); 4057 4058 return RValue<Long>(value); 4059 } 4060 4061 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs) 4062 { 4063 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value)); 4064 } 4065 4066 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs) 4067 { 4068 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value)); 4069 } 4070 4071 RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs) 4072 { 4073 return lhs = lhs + rhs; 4074 } 4075 4076 RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs) 4077 { 4078 return lhs = lhs - rhs; 4079 } 4080 4081 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y) 4082 { 4083 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value)); 4084 } 4085 4086 Type *Long::getType() 4087 { 4088 return T(llvm::Type::getInt64Ty(*::context)); 4089 } 4090 4091 Long1::Long1(const RValue<UInt> cast) 4092 { 4093 Value *undefCast = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), cast.value, 0); 4094 Value *zeroCast = Nucleus::createInsertElement(undefCast, V(Nucleus::createConstantInt(0)), 1); 4095 4096 storeValue(Nucleus::createBitCast(zeroCast, Long1::getType())); 4097 } 4098 4099 Long1::Long1(RValue<Long1> rhs) 4100 { 4101 storeValue(rhs.value); 4102 } 4103 4104 Type *Long1::getType() 4105 { 4106 if(CPUID::supportsMMX2()) 4107 { 4108 return MMX::getType(); 4109 } 4110 else 4111 { 4112 return T(VectorType::get(Long::getType(), 1)); 4113 } 4114 } 4115 4116 UInt::UInt(Argument<UInt> argument) 4117 { 4118 storeValue(argument.value); 4119 } 4120 4121 UInt::UInt(RValue<UShort> cast) 4122 { 4123 Value *integer = Nucleus::createZExt(cast.value, UInt::getType()); 4124 4125 storeValue(integer); 4126 } 4127 4128 UInt::UInt(RValue<Long> cast) 4129 { 4130 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType()); 4131 4132 storeValue(integer); 4133 } 4134 4135 UInt::UInt(RValue<Float> cast) 4136 { 4137 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 4138 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType()); 4139 4140 // Smallest positive value representable in UInt, but not in Int 4141 const unsigned int ustart = 0x80000000u; 4142 const float ustartf = float(ustart); 4143 4144 // If the value is negative, store 0, otherwise store the result of the conversion 4145 storeValue((~(As<Int>(cast) >> 31) & 4146 // Check if the value can be represented as an Int 4147 IfThenElse(cast >= ustartf, 4148 // If the value is too large, subtract ustart and re-add it after conversion. 4149 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)), 4150 // Otherwise, just convert normally 4151 Int(cast))).value); 4152 } 4153 4154 UInt::UInt() 4155 { 4156 } 4157 4158 UInt::UInt(int x) 4159 { 4160 storeValue(Nucleus::createConstantInt(x)); 4161 } 4162 4163 UInt::UInt(unsigned int x) 4164 { 4165 storeValue(Nucleus::createConstantInt(x)); 4166 } 4167 4168 UInt::UInt(RValue<UInt> rhs) 4169 { 4170 storeValue(rhs.value); 4171 } 4172 4173 UInt::UInt(RValue<Int> rhs) 4174 { 4175 storeValue(rhs.value); 4176 } 4177 4178 UInt::UInt(const UInt &rhs) 4179 { 4180 Value *value = rhs.loadValue(); 4181 storeValue(value); 4182 } 4183 4184 UInt::UInt(const Reference<UInt> &rhs) 4185 { 4186 Value *value = rhs.loadValue(); 4187 storeValue(value); 4188 } 4189 4190 UInt::UInt(const Int &rhs) 4191 { 4192 Value *value = rhs.loadValue(); 4193 storeValue(value); 4194 } 4195 4196 UInt::UInt(const Reference<Int> &rhs) 4197 { 4198 Value *value = rhs.loadValue(); 4199 storeValue(value); 4200 } 4201 4202 RValue<UInt> UInt::operator=(unsigned int rhs) const 4203 { 4204 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs))); 4205 } 4206 4207 RValue<UInt> UInt::operator=(RValue<UInt> rhs) const 4208 { 4209 storeValue(rhs.value); 4210 4211 return rhs; 4212 } 4213 4214 RValue<UInt> UInt::operator=(RValue<Int> rhs) const 4215 { 4216 storeValue(rhs.value); 4217 4218 return RValue<UInt>(rhs); 4219 } 4220 4221 RValue<UInt> UInt::operator=(const UInt &rhs) const 4222 { 4223 Value *value = rhs.loadValue(); 4224 storeValue(value); 4225 4226 return RValue<UInt>(value); 4227 } 4228 4229 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const 4230 { 4231 Value *value = rhs.loadValue(); 4232 storeValue(value); 4233 4234 return RValue<UInt>(value); 4235 } 4236 4237 RValue<UInt> UInt::operator=(const Int &rhs) const 4238 { 4239 Value *value = rhs.loadValue(); 4240 storeValue(value); 4241 4242 return RValue<UInt>(value); 4243 } 4244 4245 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const 4246 { 4247 Value *value = rhs.loadValue(); 4248 storeValue(value); 4249 4250 return RValue<UInt>(value); 4251 } 4252 4253 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs) 4254 { 4255 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value)); 4256 } 4257 4258 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs) 4259 { 4260 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value)); 4261 } 4262 4263 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs) 4264 { 4265 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value)); 4266 } 4267 4268 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs) 4269 { 4270 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value)); 4271 } 4272 4273 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs) 4274 { 4275 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value)); 4276 } 4277 4278 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs) 4279 { 4280 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value)); 4281 } 4282 4283 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs) 4284 { 4285 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value)); 4286 } 4287 4288 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs) 4289 { 4290 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value)); 4291 } 4292 4293 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs) 4294 { 4295 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value)); 4296 } 4297 4298 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs) 4299 { 4300 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value)); 4301 } 4302 4303 RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs) 4304 { 4305 return lhs = lhs + rhs; 4306 } 4307 4308 RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs) 4309 { 4310 return lhs = lhs - rhs; 4311 } 4312 4313 RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs) 4314 { 4315 return lhs = lhs * rhs; 4316 } 4317 4318 RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs) 4319 { 4320 return lhs = lhs / rhs; 4321 } 4322 4323 RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs) 4324 { 4325 return lhs = lhs % rhs; 4326 } 4327 4328 RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs) 4329 { 4330 return lhs = lhs & rhs; 4331 } 4332 4333 RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs) 4334 { 4335 return lhs = lhs | rhs; 4336 } 4337 4338 RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs) 4339 { 4340 return lhs = lhs ^ rhs; 4341 } 4342 4343 RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs) 4344 { 4345 return lhs = lhs << rhs; 4346 } 4347 4348 RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs) 4349 { 4350 return lhs = lhs >> rhs; 4351 } 4352 4353 RValue<UInt> operator+(RValue<UInt> val) 4354 { 4355 return val; 4356 } 4357 4358 RValue<UInt> operator-(RValue<UInt> val) 4359 { 4360 return RValue<UInt>(Nucleus::createNeg(val.value)); 4361 } 4362 4363 RValue<UInt> operator~(RValue<UInt> val) 4364 { 4365 return RValue<UInt>(Nucleus::createNot(val.value)); 4366 } 4367 4368 RValue<UInt> operator++(const UInt &val, int) // Post-increment 4369 { 4370 RValue<UInt> res = val; 4371 4372 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1))); 4373 val.storeValue(inc); 4374 4375 return res; 4376 } 4377 4378 const UInt &operator++(const UInt &val) // Pre-increment 4379 { 4380 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1))); 4381 val.storeValue(inc); 4382 4383 return val; 4384 } 4385 4386 RValue<UInt> operator--(const UInt &val, int) // Post-decrement 4387 { 4388 RValue<UInt> res = val; 4389 4390 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1))); 4391 val.storeValue(inc); 4392 4393 return res; 4394 } 4395 4396 const UInt &operator--(const UInt &val) // Pre-decrement 4397 { 4398 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1))); 4399 val.storeValue(inc); 4400 4401 return val; 4402 } 4403 4404 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y) 4405 { 4406 return IfThenElse(x > y, x, y); 4407 } 4408 4409 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y) 4410 { 4411 return IfThenElse(x < y, x, y); 4412 } 4413 4414 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max) 4415 { 4416 return Min(Max(x, min), max); 4417 } 4418 4419 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs) 4420 { 4421 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value)); 4422 } 4423 4424 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs) 4425 { 4426 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value)); 4427 } 4428 4429 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs) 4430 { 4431 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value)); 4432 } 4433 4434 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs) 4435 { 4436 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value)); 4437 } 4438 4439 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs) 4440 { 4441 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value)); 4442 } 4443 4444 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs) 4445 { 4446 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value)); 4447 } 4448 4449// RValue<UInt> RoundUInt(RValue<Float> cast) 4450// { 4451// return x86::cvtss2si(val); // FIXME: Unsigned 4452// 4453// // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f)); 4454// } 4455 4456 Type *UInt::getType() 4457 { 4458 return T(llvm::Type::getInt32Ty(*::context)); 4459 } 4460 4461// Int2::Int2(RValue<Int> cast) 4462// { 4463// Value *extend = Nucleus::createZExt(cast.value, Long::getType()); 4464// Value *vector = Nucleus::createBitCast(extend, Int2::getType()); 4465// 4466// int shuffle[2] = {0, 0}; 4467// Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle); 4468// 4469// storeValue(replicate); 4470// } 4471 4472 Int2::Int2(RValue<Int4> cast) 4473 { 4474 Value *long2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2))); 4475 Value *element = Nucleus::createExtractElement(long2, Long::getType(), 0); 4476 Value *int2 = Nucleus::createBitCast(element, Int2::getType()); 4477 4478 storeValue(int2); 4479 } 4480 4481 Int2::Int2() 4482 { 4483 // xy.parent = this; 4484 } 4485 4486 Int2::Int2(int x, int y) 4487 { 4488 // xy.parent = this; 4489 4490 int64_t constantVector[2] = {x, y}; 4491 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Int::getType(), 2)))); 4492 4493 storeValue(Nucleus::createBitCast(vector, getType())); 4494 } 4495 4496 Int2::Int2(RValue<Int2> rhs) 4497 { 4498 // xy.parent = this; 4499 4500 storeValue(rhs.value); 4501 } 4502 4503 Int2::Int2(const Int2 &rhs) 4504 { 4505 // xy.parent = this; 4506 4507 Value *value = rhs.loadValue(); 4508 storeValue(value); 4509 } 4510 4511 Int2::Int2(const Reference<Int2> &rhs) 4512 { 4513 // xy.parent = this; 4514 4515 Value *value = rhs.loadValue(); 4516 storeValue(value); 4517 } 4518 4519 Int2::Int2(RValue<Int> lo, RValue<Int> hi) 4520 { 4521 if(CPUID::supportsMMX2()) 4522 { 4523 // movd mm0, lo 4524 // movd mm1, hi 4525 // punpckldq mm0, mm1 4526 storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value); 4527 } 4528 else 4529 { 4530 int shuffle[2] = {0, 1}; 4531 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(VectorType::get(Int::getType(), 1))), Nucleus::createBitCast(hi.value, T(VectorType::get(Int::getType(), 1))), shuffle); 4532 4533 storeValue(Nucleus::createBitCast(packed, Int2::getType())); 4534 } 4535 } 4536 4537 RValue<Int2> Int2::operator=(RValue<Int2> rhs) const 4538 { 4539 storeValue(rhs.value); 4540 4541 return rhs; 4542 } 4543 4544 RValue<Int2> Int2::operator=(const Int2 &rhs) const 4545 { 4546 Value *value = rhs.loadValue(); 4547 storeValue(value); 4548 4549 return RValue<Int2>(value); 4550 } 4551 4552 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const 4553 { 4554 Value *value = rhs.loadValue(); 4555 storeValue(value); 4556 4557 return RValue<Int2>(value); 4558 } 4559 4560 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs) 4561 { 4562 if(CPUID::supportsMMX2()) 4563 { 4564 return x86::paddd(lhs, rhs); 4565 } 4566 else 4567 { 4568 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value)); 4569 } 4570 } 4571 4572 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs) 4573 { 4574 if(CPUID::supportsMMX2()) 4575 { 4576 return x86::psubd(lhs, rhs); 4577 } 4578 else 4579 { 4580 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value)); 4581 } 4582 } 4583 4584// RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs) 4585// { 4586// return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value)); 4587// } 4588 4589// RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs) 4590// { 4591// return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value)); 4592// } 4593 4594// RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs) 4595// { 4596// return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value)); 4597// } 4598 4599 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs) 4600 { 4601 if(CPUID::supportsMMX2()) 4602 { 4603 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 4604 } 4605 else 4606 { 4607 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value)); 4608 } 4609 } 4610 4611 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs) 4612 { 4613 if(CPUID::supportsMMX2()) 4614 { 4615 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 4616 } 4617 else 4618 { 4619 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value)); 4620 } 4621 } 4622 4623 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs) 4624 { 4625 if(CPUID::supportsMMX2()) 4626 { 4627 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 4628 } 4629 else 4630 { 4631 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value)); 4632 } 4633 } 4634 4635 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs) 4636 { 4637 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4638 4639 return x86::pslld(lhs, rhs); 4640 } 4641 4642 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs) 4643 { 4644 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 4645 4646 return x86::psrad(lhs, rhs); 4647 } 4648 4649 RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs) 4650 { 4651 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value)); 4652 4653 return x86::pslld(lhs, rhs); 4654 } 4655 4656 RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs) 4657 { 4658 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value)); 4659 4660 return x86::psrad(lhs, rhs); 4661 } 4662 4663 RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs) 4664 { 4665 return lhs = lhs + rhs; 4666 } 4667 4668 RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs) 4669 { 4670 return lhs = lhs - rhs; 4671 } 4672 4673// RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs) 4674// { 4675// return lhs = lhs * rhs; 4676// } 4677 4678// RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs) 4679// { 4680// return lhs = lhs / rhs; 4681// } 4682 4683// RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs) 4684// { 4685// return lhs = lhs % rhs; 4686// } 4687 4688 RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs) 4689 { 4690 return lhs = lhs & rhs; 4691 } 4692 4693 RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs) 4694 { 4695 return lhs = lhs | rhs; 4696 } 4697 4698 RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs) 4699 { 4700 return lhs = lhs ^ rhs; 4701 } 4702 4703 RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs) 4704 { 4705 return lhs = lhs << rhs; 4706 } 4707 4708 RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs) 4709 { 4710 return lhs = lhs >> rhs; 4711 } 4712 4713 RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs) 4714 { 4715 return lhs = lhs << rhs; 4716 } 4717 4718 RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs) 4719 { 4720 return lhs = lhs >> rhs; 4721 } 4722 4723// RValue<Int2> operator+(RValue<Int2> val) 4724// { 4725// return val; 4726// } 4727 4728// RValue<Int2> operator-(RValue<Int2> val) 4729// { 4730// return RValue<Int2>(Nucleus::createNeg(val.value)); 4731// } 4732 4733 RValue<Int2> operator~(RValue<Int2> val) 4734 { 4735 if(CPUID::supportsMMX2()) 4736 { 4737 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF); 4738 } 4739 else 4740 { 4741 return RValue<Int2>(Nucleus::createNot(val.value)); 4742 } 4743 } 4744 4745 RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y) 4746 { 4747 if(CPUID::supportsMMX2()) 4748 { 4749 return x86::punpckldq(x, y); 4750 } 4751 else 4752 { 4753 int shuffle[2] = {0, 2}; 4754 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 4755 4756 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType())); 4757 } 4758 } 4759 4760 RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y) 4761 { 4762 if(CPUID::supportsMMX2()) 4763 { 4764 return x86::punpckhdq(x, y); 4765 } 4766 else 4767 { 4768 int shuffle[2] = {1, 3}; 4769 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle); 4770 4771 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType())); 4772 } 4773 } 4774 4775 RValue<Int> Extract(RValue<Int2> val, int i) 4776 { 4777 if(false) // FIXME: LLVM does not generate optimal code 4778 { 4779 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i)); 4780 } 4781 else 4782 { 4783 if(i == 0) 4784 { 4785 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), Int::getType(), 0)); 4786 } 4787 else 4788 { 4789 Int2 val2 = As<Int2>(UnpackHigh(val, val)); 4790 4791 return Extract(val2, 0); 4792 } 4793 } 4794 } 4795 4796 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i) 4797 { 4798 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), element.value, i), Int2::getType())); 4799 } 4800 4801 Type *Int2::getType() 4802 { 4803 if(CPUID::supportsMMX2()) 4804 { 4805 return MMX::getType(); 4806 } 4807 else 4808 { 4809 return T(VectorType::get(Int::getType(), 2)); 4810 } 4811 } 4812 4813 UInt2::UInt2() 4814 { 4815 // xy.parent = this; 4816 } 4817 4818 UInt2::UInt2(unsigned int x, unsigned int y) 4819 { 4820 // xy.parent = this; 4821 4822 int64_t constantVector[2] = {x, y}; 4823 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UInt::getType(), 2)))); 4824 4825 storeValue(Nucleus::createBitCast(vector, getType())); 4826 } 4827 4828 UInt2::UInt2(RValue<UInt2> rhs) 4829 { 4830 // xy.parent = this; 4831 4832 storeValue(rhs.value); 4833 } 4834 4835 UInt2::UInt2(const UInt2 &rhs) 4836 { 4837 // xy.parent = this; 4838 4839 Value *value = rhs.loadValue(); 4840 storeValue(value); 4841 } 4842 4843 UInt2::UInt2(const Reference<UInt2> &rhs) 4844 { 4845 // xy.parent = this; 4846 4847 Value *value = rhs.loadValue(); 4848 storeValue(value); 4849 } 4850 4851 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const 4852 { 4853 storeValue(rhs.value); 4854 4855 return rhs; 4856 } 4857 4858 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const 4859 { 4860 Value *value = rhs.loadValue(); 4861 storeValue(value); 4862 4863 return RValue<UInt2>(value); 4864 } 4865 4866 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const 4867 { 4868 Value *value = rhs.loadValue(); 4869 storeValue(value); 4870 4871 return RValue<UInt2>(value); 4872 } 4873 4874 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs) 4875 { 4876 if(CPUID::supportsMMX2()) 4877 { 4878 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs))); 4879 } 4880 else 4881 { 4882 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value)); 4883 } 4884 } 4885 4886 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs) 4887 { 4888 if(CPUID::supportsMMX2()) 4889 { 4890 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs))); 4891 } 4892 else 4893 { 4894 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value)); 4895 } 4896 } 4897 4898// RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs) 4899// { 4900// return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value)); 4901// } 4902 4903// RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs) 4904// { 4905// return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value)); 4906// } 4907 4908// RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs) 4909// { 4910// return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value)); 4911// } 4912 4913 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs) 4914 { 4915 if(CPUID::supportsMMX2()) 4916 { 4917 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs))); 4918 } 4919 else 4920 { 4921 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value)); 4922 } 4923 } 4924 4925 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs) 4926 { 4927 if(CPUID::supportsMMX2()) 4928 { 4929 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs))); 4930 } 4931 else 4932 { 4933 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value)); 4934 } 4935 } 4936 4937 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs) 4938 { 4939 if(CPUID::supportsMMX2()) 4940 { 4941 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs))); 4942 } 4943 else 4944 { 4945 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value)); 4946 } 4947 } 4948 4949 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs) 4950 { 4951 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 4952 4953 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 4954 } 4955 4956 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs) 4957 { 4958 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 4959 4960 return x86::psrld(lhs, rhs); 4961 } 4962 4963 RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs) 4964 { 4965 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value)); 4966 4967 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs)); 4968 } 4969 4970 RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs) 4971 { 4972 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value)); 4973 4974 return x86::psrld(lhs, rhs); 4975 } 4976 4977 RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs) 4978 { 4979 return lhs = lhs + rhs; 4980 } 4981 4982 RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs) 4983 { 4984 return lhs = lhs - rhs; 4985 } 4986 4987// RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs) 4988// { 4989// return lhs = lhs * rhs; 4990// } 4991 4992// RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs) 4993// { 4994// return lhs = lhs / rhs; 4995// } 4996 4997// RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs) 4998// { 4999// return lhs = lhs % rhs; 5000// } 5001 5002 RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs) 5003 { 5004 return lhs = lhs & rhs; 5005 } 5006 5007 RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs) 5008 { 5009 return lhs = lhs | rhs; 5010 } 5011 5012 RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs) 5013 { 5014 return lhs = lhs ^ rhs; 5015 } 5016 5017 RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs) 5018 { 5019 return lhs = lhs << rhs; 5020 } 5021 5022 RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs) 5023 { 5024 return lhs = lhs >> rhs; 5025 } 5026 5027 RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs) 5028 { 5029 return lhs = lhs << rhs; 5030 } 5031 5032 RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs) 5033 { 5034 return lhs = lhs >> rhs; 5035 } 5036 5037// RValue<UInt2> operator+(RValue<UInt2> val) 5038// { 5039// return val; 5040// } 5041 5042// RValue<UInt2> operator-(RValue<UInt2> val) 5043// { 5044// return RValue<UInt2>(Nucleus::createNeg(val.value)); 5045// } 5046 5047 RValue<UInt2> operator~(RValue<UInt2> val) 5048 { 5049 if(CPUID::supportsMMX2()) 5050 { 5051 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF); 5052 } 5053 else 5054 { 5055 return RValue<UInt2>(Nucleus::createNot(val.value)); 5056 } 5057 } 5058 5059 Type *UInt2::getType() 5060 { 5061 if(CPUID::supportsMMX2()) 5062 { 5063 return MMX::getType(); 5064 } 5065 else 5066 { 5067 return T(VectorType::get(UInt::getType(), 2)); 5068 } 5069 } 5070 5071 Int4::Int4(RValue<Byte4> cast) 5072 { 5073 Value *x = Nucleus::createBitCast(cast.value, Int::getType()); 5074 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0); 5075 5076 Value *e; 5077 5078 if (CPUID::supportsSSE4_1()) 5079 { 5080 e = x86::pmovzxbd(RValue<Int4>(a)).value; 5081 } 5082 else 5083 { 5084 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; 5085 Value *b = Nucleus::createBitCast(a, Byte16::getType()); 5086 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle); 5087 5088 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 5089 Value *d = Nucleus::createBitCast(c, Short8::getType()); 5090 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2); 5091 } 5092 5093 Value *f = Nucleus::createBitCast(e, Int4::getType()); 5094 storeValue(f); 5095 } 5096 5097 Int4::Int4(RValue<SByte4> cast) 5098 { 5099 Value *x = Nucleus::createBitCast(cast.value, Int::getType()); 5100 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0); 5101 5102 Value *g; 5103 5104 if (CPUID::supportsSSE4_1()) 5105 { 5106 g = x86::pmovsxbd(RValue<Int4>(a)).value; 5107 } 5108 else 5109 { 5110 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}; 5111 Value *b = Nucleus::createBitCast(a, Byte16::getType()); 5112 Value *c = Nucleus::createShuffleVector(b, b, swizzle); 5113 5114 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 5115 Value *d = Nucleus::createBitCast(c, Short8::getType()); 5116 Value *e = Nucleus::createShuffleVector(d, d, swizzle2); 5117 5118 Value *f = Nucleus::createBitCast(e, Int4::getType()); 5119 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24)); 5120 g = x86::psrad(RValue<Int4>(f), 24).value; 5121 } 5122 5123 storeValue(g); 5124 } 5125 5126 Int4::Int4(RValue<Float4> cast) 5127 { 5128 // xyzw.parent = this; 5129 5130 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType()); 5131 5132 storeValue(xyzw); 5133 } 5134 5135 Int4::Int4(RValue<Short4> cast) 5136 { 5137 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2))); 5138 Value *element = Nucleus::createBitCast(cast.value, Long::getType()); 5139 long2 = Nucleus::createInsertElement(long2, element, 0); 5140 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType())); 5141 5142 if(CPUID::supportsSSE4_1()) 5143 { 5144 storeValue(x86::pmovsxwd(vector).value); 5145 } 5146 else 5147 { 5148 Value *b = Nucleus::createBitCast(vector.value, Short8::getType()); 5149 5150 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3}; 5151 Value *c = Nucleus::createShuffleVector(b, b, swizzle); 5152 Value *d = Nucleus::createBitCast(c, Int4::getType()); 5153 storeValue(d); 5154 5155 // Each Short is packed into each Int in the (Short | Short) format. 5156 // Shifting by 16 will retrieve the original Short value. 5157 // Shitfing an Int will propagate the sign bit, which will work 5158 // for both positive and negative values of a Short. 5159 *this >>= 16; 5160 } 5161 } 5162 5163 Int4::Int4(RValue<UShort4> cast) 5164 { 5165 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2))); 5166 Value *element = Nucleus::createBitCast(cast.value, Long::getType()); 5167 long2 = Nucleus::createInsertElement(long2, element, 0); 5168 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType())); 5169 5170 if(CPUID::supportsSSE4_1()) 5171 { 5172 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value); 5173 } 5174 else 5175 { 5176 Value *b = Nucleus::createBitCast(vector.value, Short8::getType()); 5177 5178 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11}; 5179 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), swizzle); 5180 Value *d = Nucleus::createBitCast(c, Int4::getType()); 5181 storeValue(d); 5182 } 5183 } 5184 5185 Int4::Int4() 5186 { 5187 // xyzw.parent = this; 5188 } 5189 5190 Int4::Int4(int xyzw) 5191 { 5192 constant(xyzw, xyzw, xyzw, xyzw); 5193 } 5194 5195 Int4::Int4(int x, int yzw) 5196 { 5197 constant(x, yzw, yzw, yzw); 5198 } 5199 5200 Int4::Int4(int x, int y, int zw) 5201 { 5202 constant(x, y, zw, zw); 5203 } 5204 5205 Int4::Int4(int x, int y, int z, int w) 5206 { 5207 constant(x, y, z, w); 5208 } 5209 5210 void Int4::constant(int x, int y, int z, int w) 5211 { 5212 // xyzw.parent = this; 5213 5214 int64_t constantVector[4] = {x, y, z, w}; 5215 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5216 } 5217 5218 Int4::Int4(RValue<Int4> rhs) 5219 { 5220 // xyzw.parent = this; 5221 5222 storeValue(rhs.value); 5223 } 5224 5225 Int4::Int4(const Int4 &rhs) 5226 { 5227 // xyzw.parent = this; 5228 5229 Value *value = rhs.loadValue(); 5230 storeValue(value); 5231 } 5232 5233 Int4::Int4(const Reference<Int4> &rhs) 5234 { 5235 // xyzw.parent = this; 5236 5237 Value *value = rhs.loadValue(); 5238 storeValue(value); 5239 } 5240 5241 Int4::Int4(RValue<UInt4> rhs) 5242 { 5243 // xyzw.parent = this; 5244 5245 storeValue(rhs.value); 5246 } 5247 5248 Int4::Int4(const UInt4 &rhs) 5249 { 5250 // xyzw.parent = this; 5251 5252 Value *value = rhs.loadValue(); 5253 storeValue(value); 5254 } 5255 5256 Int4::Int4(const Reference<UInt4> &rhs) 5257 { 5258 // xyzw.parent = this; 5259 5260 Value *value = rhs.loadValue(); 5261 storeValue(value); 5262 } 5263 5264 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) 5265 { 5266 // xyzw.parent = this; 5267 5268 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 5269 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 5270 5271 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2))); 5272 long2 = Nucleus::createInsertElement(long2, loLong, 0); 5273 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 5274 Value *int4 = Nucleus::createBitCast(long2, Int4::getType()); 5275 5276 storeValue(int4); 5277 } 5278 5279 Int4::Int4(RValue<Int> rhs) 5280 { 5281 // xyzw.parent = this; 5282 5283 Value *vector = loadValue(); 5284 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 5285 5286 int swizzle[4] = {0, 0, 0, 0}; 5287 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 5288 5289 storeValue(replicate); 5290 } 5291 5292 Int4::Int4(const Int &rhs) 5293 { 5294 // xyzw.parent = this; 5295 5296 *this = RValue<Int>(rhs.loadValue()); 5297 } 5298 5299 Int4::Int4(const Reference<Int> &rhs) 5300 { 5301 // xyzw.parent = this; 5302 5303 *this = RValue<Int>(rhs.loadValue()); 5304 } 5305 5306 RValue<Int4> Int4::operator=(RValue<Int4> rhs) const 5307 { 5308 storeValue(rhs.value); 5309 5310 return rhs; 5311 } 5312 5313 RValue<Int4> Int4::operator=(const Int4 &rhs) const 5314 { 5315 Value *value = rhs.loadValue(); 5316 storeValue(value); 5317 5318 return RValue<Int4>(value); 5319 } 5320 5321 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const 5322 { 5323 Value *value = rhs.loadValue(); 5324 storeValue(value); 5325 5326 return RValue<Int4>(value); 5327 } 5328 5329 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs) 5330 { 5331 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value)); 5332 } 5333 5334 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs) 5335 { 5336 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value)); 5337 } 5338 5339 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs) 5340 { 5341 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value)); 5342 } 5343 5344 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs) 5345 { 5346 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value)); 5347 } 5348 5349 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs) 5350 { 5351 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value)); 5352 } 5353 5354 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs) 5355 { 5356 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value)); 5357 } 5358 5359 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs) 5360 { 5361 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value)); 5362 } 5363 5364 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs) 5365 { 5366 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value)); 5367 } 5368 5369 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs) 5370 { 5371 return x86::pslld(lhs, rhs); 5372 } 5373 5374 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs) 5375 { 5376 return x86::psrad(lhs, rhs); 5377 } 5378 5379 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs) 5380 { 5381 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value)); 5382 } 5383 5384 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs) 5385 { 5386 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value)); 5387 } 5388 5389 RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs) 5390 { 5391 return lhs = lhs + rhs; 5392 } 5393 5394 RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs) 5395 { 5396 return lhs = lhs - rhs; 5397 } 5398 5399 RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs) 5400 { 5401 return lhs = lhs * rhs; 5402 } 5403 5404// RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs) 5405// { 5406// return lhs = lhs / rhs; 5407// } 5408 5409// RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs) 5410// { 5411// return lhs = lhs % rhs; 5412// } 5413 5414 RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs) 5415 { 5416 return lhs = lhs & rhs; 5417 } 5418 5419 RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs) 5420 { 5421 return lhs = lhs | rhs; 5422 } 5423 5424 RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs) 5425 { 5426 return lhs = lhs ^ rhs; 5427 } 5428 5429 RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs) 5430 { 5431 return lhs = lhs << rhs; 5432 } 5433 5434 RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs) 5435 { 5436 return lhs = lhs >> rhs; 5437 } 5438 5439 RValue<Int4> operator+(RValue<Int4> val) 5440 { 5441 return val; 5442 } 5443 5444 RValue<Int4> operator-(RValue<Int4> val) 5445 { 5446 return RValue<Int4>(Nucleus::createNeg(val.value)); 5447 } 5448 5449 RValue<Int4> operator~(RValue<Int4> val) 5450 { 5451 return RValue<Int4>(Nucleus::createNot(val.value)); 5452 } 5453 5454 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y) 5455 { 5456 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5457 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5458 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5459 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5460 } 5461 5462 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y) 5463 { 5464 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())); 5465 } 5466 5467 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y) 5468 { 5469 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5470 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5471 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())); 5472 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5473 } 5474 5475 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y) 5476 { 5477 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5478 } 5479 5480 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y) 5481 { 5482 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5483 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5484 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())); 5485 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF); 5486 } 5487 5488 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y) 5489 { 5490 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())); 5491 } 5492 5493 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y) 5494 { 5495 if(CPUID::supportsSSE4_1()) 5496 { 5497 return x86::pmaxsd(x, y); 5498 } 5499 else 5500 { 5501 RValue<Int4> greater = CmpNLE(x, y); 5502 return x & greater | y & ~greater; 5503 } 5504 } 5505 5506 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y) 5507 { 5508 if(CPUID::supportsSSE4_1()) 5509 { 5510 return x86::pminsd(x, y); 5511 } 5512 else 5513 { 5514 RValue<Int4> less = CmpLT(x, y); 5515 return x & less | y & ~less; 5516 } 5517 } 5518 5519 RValue<Int4> RoundInt(RValue<Float4> cast) 5520 { 5521 return x86::cvtps2dq(cast); 5522 } 5523 5524 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y) 5525 { 5526 return x86::packssdw(x, y); 5527 } 5528 5529 RValue<Int> Extract(RValue<Int4> x, int i) 5530 { 5531 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i)); 5532 } 5533 5534 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i) 5535 { 5536 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i)); 5537 } 5538 5539 RValue<Int> SignMask(RValue<Int4> x) 5540 { 5541 return x86::movmskps(As<Float4>(x)); 5542 } 5543 5544 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select) 5545 { 5546 return RValue<Int4>(createSwizzle4(x.value, select)); 5547 } 5548 5549 Type *Int4::getType() 5550 { 5551 return T(VectorType::get(Int::getType(), 4)); 5552 } 5553 5554 UInt4::UInt4(RValue<Float4> cast) 5555 { 5556 // xyzw.parent = this; 5557 5558 // Note: createFPToUI is broken, must perform conversion using createFPtoSI 5559 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType()); 5560 5561 // Smallest positive value representable in UInt, but not in Int 5562 const unsigned int ustart = 0x80000000u; 5563 const float ustartf = float(ustart); 5564 5565 // Check if the value can be represented as an Int 5566 Int4 uiValue = CmpNLT(cast, Float4(ustartf)); 5567 // If the value is too large, subtract ustart and re-add it after conversion. 5568 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) | 5569 // Otherwise, just convert normally 5570 (~uiValue & Int4(cast)); 5571 // If the value is negative, store 0, otherwise store the result of the conversion 5572 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value); 5573 } 5574 5575 UInt4::UInt4() 5576 { 5577 // xyzw.parent = this; 5578 } 5579 5580 UInt4::UInt4(int xyzw) 5581 { 5582 constant(xyzw, xyzw, xyzw, xyzw); 5583 } 5584 5585 UInt4::UInt4(int x, int yzw) 5586 { 5587 constant(x, yzw, yzw, yzw); 5588 } 5589 5590 UInt4::UInt4(int x, int y, int zw) 5591 { 5592 constant(x, y, zw, zw); 5593 } 5594 5595 UInt4::UInt4(int x, int y, int z, int w) 5596 { 5597 constant(x, y, z, w); 5598 } 5599 5600 void UInt4::constant(int x, int y, int z, int w) 5601 { 5602 // xyzw.parent = this; 5603 5604 int64_t constantVector[4] = {x, y, z, w}; 5605 storeValue(Nucleus::createConstantVector(constantVector, getType())); 5606 } 5607 5608 UInt4::UInt4(RValue<UInt4> rhs) 5609 { 5610 // xyzw.parent = this; 5611 5612 storeValue(rhs.value); 5613 } 5614 5615 UInt4::UInt4(const UInt4 &rhs) 5616 { 5617 // xyzw.parent = this; 5618 5619 Value *value = rhs.loadValue(); 5620 storeValue(value); 5621 } 5622 5623 UInt4::UInt4(const Reference<UInt4> &rhs) 5624 { 5625 // xyzw.parent = this; 5626 5627 Value *value = rhs.loadValue(); 5628 storeValue(value); 5629 } 5630 5631 UInt4::UInt4(RValue<Int4> rhs) 5632 { 5633 // xyzw.parent = this; 5634 5635 storeValue(rhs.value); 5636 } 5637 5638 UInt4::UInt4(const Int4 &rhs) 5639 { 5640 // xyzw.parent = this; 5641 5642 Value *value = rhs.loadValue(); 5643 storeValue(value); 5644 } 5645 5646 UInt4::UInt4(const Reference<Int4> &rhs) 5647 { 5648 // xyzw.parent = this; 5649 5650 Value *value = rhs.loadValue(); 5651 storeValue(value); 5652 } 5653 5654 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) 5655 { 5656 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType()); 5657 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType()); 5658 5659 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2))); 5660 long2 = Nucleus::createInsertElement(long2, loLong, 0); 5661 long2 = Nucleus::createInsertElement(long2, hiLong, 1); 5662 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType()); 5663 5664 storeValue(uint4); 5665 } 5666 5667 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const 5668 { 5669 storeValue(rhs.value); 5670 5671 return rhs; 5672 } 5673 5674 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const 5675 { 5676 Value *value = rhs.loadValue(); 5677 storeValue(value); 5678 5679 return RValue<UInt4>(value); 5680 } 5681 5682 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const 5683 { 5684 Value *value = rhs.loadValue(); 5685 storeValue(value); 5686 5687 return RValue<UInt4>(value); 5688 } 5689 5690 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs) 5691 { 5692 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value)); 5693 } 5694 5695 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs) 5696 { 5697 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value)); 5698 } 5699 5700 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs) 5701 { 5702 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value)); 5703 } 5704 5705 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs) 5706 { 5707 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value)); 5708 } 5709 5710 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs) 5711 { 5712 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value)); 5713 } 5714 5715 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs) 5716 { 5717 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value)); 5718 } 5719 5720 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs) 5721 { 5722 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value)); 5723 } 5724 5725 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs) 5726 { 5727 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value)); 5728 } 5729 5730 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs) 5731 { 5732 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs)); 5733 } 5734 5735 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs) 5736 { 5737 return x86::psrld(lhs, rhs); 5738 } 5739 5740 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs) 5741 { 5742 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value)); 5743 } 5744 5745 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs) 5746 { 5747 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value)); 5748 } 5749 5750 RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs) 5751 { 5752 return lhs = lhs + rhs; 5753 } 5754 5755 RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs) 5756 { 5757 return lhs = lhs - rhs; 5758 } 5759 5760 RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs) 5761 { 5762 return lhs = lhs * rhs; 5763 } 5764 5765// RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs) 5766// { 5767// return lhs = lhs / rhs; 5768// } 5769 5770// RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs) 5771// { 5772// return lhs = lhs % rhs; 5773// } 5774 5775 RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs) 5776 { 5777 return lhs = lhs & rhs; 5778 } 5779 5780 RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs) 5781 { 5782 return lhs = lhs | rhs; 5783 } 5784 5785 RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs) 5786 { 5787 return lhs = lhs ^ rhs; 5788 } 5789 5790 RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs) 5791 { 5792 return lhs = lhs << rhs; 5793 } 5794 5795 RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs) 5796 { 5797 return lhs = lhs >> rhs; 5798 } 5799 5800 RValue<UInt4> operator+(RValue<UInt4> val) 5801 { 5802 return val; 5803 } 5804 5805 RValue<UInt4> operator-(RValue<UInt4> val) 5806 { 5807 return RValue<UInt4>(Nucleus::createNeg(val.value)); 5808 } 5809 5810 RValue<UInt4> operator~(RValue<UInt4> val) 5811 { 5812 return RValue<UInt4>(Nucleus::createNot(val.value)); 5813 } 5814 5815 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y) 5816 { 5817 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5818 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5819 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())); 5820 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5821 } 5822 5823 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y) 5824 { 5825 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())); 5826 } 5827 5828 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y) 5829 { 5830 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5831 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5832 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType())); 5833 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5834 } 5835 5836 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y) 5837 { 5838 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())); 5839 } 5840 5841 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y) 5842 { 5843 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0 5844 // Restore the following line when LLVM is updated to a version where this issue is fixed. 5845 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType())); 5846 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF); 5847 } 5848 5849 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y) 5850 { 5851 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())); 5852 } 5853 5854 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y) 5855 { 5856 if(CPUID::supportsSSE4_1()) 5857 { 5858 return x86::pmaxud(x, y); 5859 } 5860 else 5861 { 5862 RValue<UInt4> greater = CmpNLE(x, y); 5863 return x & greater | y & ~greater; 5864 } 5865 } 5866 5867 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y) 5868 { 5869 if(CPUID::supportsSSE4_1()) 5870 { 5871 return x86::pminud(x, y); 5872 } 5873 else 5874 { 5875 RValue<UInt4> less = CmpLT(x, y); 5876 return x & less | y & ~less; 5877 } 5878 } 5879 5880 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y) 5881 { 5882 return x86::packusdw(x, y); // FIXME: Fallback required 5883 } 5884 5885 Type *UInt4::getType() 5886 { 5887 return T(VectorType::get(UInt::getType(), 4)); 5888 } 5889 5890 Float::Float(RValue<Int> cast) 5891 { 5892 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType()); 5893 5894 storeValue(integer); 5895 } 5896 5897 Float::Float() 5898 { 5899 5900 } 5901 5902 Float::Float(float x) 5903 { 5904 storeValue(Nucleus::createConstantFloat(x)); 5905 } 5906 5907 Float::Float(RValue<Float> rhs) 5908 { 5909 storeValue(rhs.value); 5910 } 5911 5912 Float::Float(const Float &rhs) 5913 { 5914 Value *value = rhs.loadValue(); 5915 storeValue(value); 5916 } 5917 5918 Float::Float(const Reference<Float> &rhs) 5919 { 5920 Value *value = rhs.loadValue(); 5921 storeValue(value); 5922 } 5923 5924 RValue<Float> Float::operator=(RValue<Float> rhs) const 5925 { 5926 storeValue(rhs.value); 5927 5928 return rhs; 5929 } 5930 5931 RValue<Float> Float::operator=(const Float &rhs) const 5932 { 5933 Value *value = rhs.loadValue(); 5934 storeValue(value); 5935 5936 return RValue<Float>(value); 5937 } 5938 5939 RValue<Float> Float::operator=(const Reference<Float> &rhs) const 5940 { 5941 Value *value = rhs.loadValue(); 5942 storeValue(value); 5943 5944 return RValue<Float>(value); 5945 } 5946 5947 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs) 5948 { 5949 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value)); 5950 } 5951 5952 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs) 5953 { 5954 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value)); 5955 } 5956 5957 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs) 5958 { 5959 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value)); 5960 } 5961 5962 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs) 5963 { 5964 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value)); 5965 } 5966 5967 RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs) 5968 { 5969 return lhs = lhs + rhs; 5970 } 5971 5972 RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs) 5973 { 5974 return lhs = lhs - rhs; 5975 } 5976 5977 RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs) 5978 { 5979 return lhs = lhs * rhs; 5980 } 5981 5982 RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs) 5983 { 5984 return lhs = lhs / rhs; 5985 } 5986 5987 RValue<Float> operator+(RValue<Float> val) 5988 { 5989 return val; 5990 } 5991 5992 RValue<Float> operator-(RValue<Float> val) 5993 { 5994 return RValue<Float>(Nucleus::createFNeg(val.value)); 5995 } 5996 5997 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs) 5998 { 5999 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value)); 6000 } 6001 6002 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs) 6003 { 6004 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value)); 6005 } 6006 6007 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs) 6008 { 6009 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value)); 6010 } 6011 6012 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs) 6013 { 6014 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value)); 6015 } 6016 6017 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs) 6018 { 6019 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value)); 6020 } 6021 6022 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs) 6023 { 6024 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value)); 6025 } 6026 6027 RValue<Float> Abs(RValue<Float> x) 6028 { 6029 return IfThenElse(x > 0.0f, x, -x); 6030 } 6031 6032 RValue<Float> Max(RValue<Float> x, RValue<Float> y) 6033 { 6034 return IfThenElse(x > y, x, y); 6035 } 6036 6037 RValue<Float> Min(RValue<Float> x, RValue<Float> y) 6038 { 6039 return IfThenElse(x < y, x, y); 6040 } 6041 6042 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2) 6043 { 6044 if(exactAtPow2) 6045 { 6046 // rcpss uses a piecewise-linear approximation which minimizes the relative error 6047 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 6048 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 6049 } 6050 else 6051 { 6052 return x86::rcpss(x); 6053 } 6054 } 6055 6056 RValue<Float> RcpSqrt_pp(RValue<Float> x) 6057 { 6058 return x86::rsqrtss(x); 6059 } 6060 6061 RValue<Float> Sqrt(RValue<Float> x) 6062 { 6063 return x86::sqrtss(x); 6064 } 6065 6066 RValue<Float> Round(RValue<Float> x) 6067 { 6068 if(CPUID::supportsSSE4_1()) 6069 { 6070 return x86::roundss(x, 0); 6071 } 6072 else 6073 { 6074 return Float4(Round(Float4(x))).x; 6075 } 6076 } 6077 6078 RValue<Float> Trunc(RValue<Float> x) 6079 { 6080 if(CPUID::supportsSSE4_1()) 6081 { 6082 return x86::roundss(x, 3); 6083 } 6084 else 6085 { 6086 return Float(Int(x)); // Rounded toward zero 6087 } 6088 } 6089 6090 RValue<Float> Frac(RValue<Float> x) 6091 { 6092 if(CPUID::supportsSSE4_1()) 6093 { 6094 return x - x86::floorss(x); 6095 } 6096 else 6097 { 6098 return Float4(Frac(Float4(x))).x; 6099 } 6100 } 6101 6102 RValue<Float> Floor(RValue<Float> x) 6103 { 6104 if(CPUID::supportsSSE4_1()) 6105 { 6106 return x86::floorss(x); 6107 } 6108 else 6109 { 6110 return Float4(Floor(Float4(x))).x; 6111 } 6112 } 6113 6114 RValue<Float> Ceil(RValue<Float> x) 6115 { 6116 if(CPUID::supportsSSE4_1()) 6117 { 6118 return x86::ceilss(x); 6119 } 6120 else 6121 { 6122 return Float4(Ceil(Float4(x))).x; 6123 } 6124 } 6125 6126 Type *Float::getType() 6127 { 6128 return T(llvm::Type::getFloatTy(*::context)); 6129 } 6130 6131 Float2::Float2(RValue<Float4> cast) 6132 { 6133 // xyzw.parent = this; 6134 6135 Value *int64x2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2))); 6136 Value *int64 = Nucleus::createExtractElement(int64x2, Long::getType(), 0); 6137 Value *float2 = Nucleus::createBitCast(int64, Float2::getType()); 6138 6139 storeValue(float2); 6140 } 6141 6142 Type *Float2::getType() 6143 { 6144 return T(VectorType::get(Float::getType(), 2)); 6145 } 6146 6147 Float4::Float4(RValue<Byte4> cast) 6148 { 6149 xyzw.parent = this; 6150 6151 #if 0 6152 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes 6153 #elif 0 6154 Value *vector = loadValue(); 6155 6156 Value *i8x = Nucleus::createExtractElement(cast.value, 0); 6157 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType()); 6158 Value *x = Nucleus::createInsertElement(vector, f32x, 0); 6159 6160 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1))); 6161 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType()); 6162 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1))); 6163 6164 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2)); 6165 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType()); 6166 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2)); 6167 6168 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3)); 6169 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType()); 6170 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3)); 6171 #else 6172 Value *a = Int4(cast).loadValue(); 6173 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 6174 #endif 6175 6176 storeValue(xyzw); 6177 } 6178 6179 Float4::Float4(RValue<SByte4> cast) 6180 { 6181 xyzw.parent = this; 6182 6183 #if 0 6184 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes 6185 #elif 0 6186 Value *vector = loadValue(); 6187 6188 Value *i8x = Nucleus::createExtractElement(cast.value, 0); 6189 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType()); 6190 Value *x = Nucleus::createInsertElement(vector, f32x, 0); 6191 6192 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1))); 6193 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType()); 6194 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1))); 6195 6196 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2)); 6197 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType()); 6198 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2)); 6199 6200 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3)); 6201 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType()); 6202 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3)); 6203 #else 6204 Value *a = Int4(cast).loadValue(); 6205 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType()); 6206 #endif 6207 6208 storeValue(xyzw); 6209 } 6210 6211 Float4::Float4(RValue<Short4> cast) 6212 { 6213 xyzw.parent = this; 6214 6215 Int4 c(cast); 6216 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 6217 } 6218 6219 Float4::Float4(RValue<UShort4> cast) 6220 { 6221 xyzw.parent = this; 6222 6223 Int4 c(cast); 6224 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType())); 6225 } 6226 6227 Float4::Float4(RValue<Int4> cast) 6228 { 6229 xyzw.parent = this; 6230 6231 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); 6232 6233 storeValue(xyzw); 6234 } 6235 6236 Float4::Float4(RValue<UInt4> cast) 6237 { 6238 xyzw.parent = this; 6239 6240 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); 6241 6242 storeValue(xyzw); 6243 } 6244 6245 Float4::Float4() 6246 { 6247 xyzw.parent = this; 6248 } 6249 6250 Float4::Float4(float xyzw) 6251 { 6252 constant(xyzw, xyzw, xyzw, xyzw); 6253 } 6254 6255 Float4::Float4(float x, float yzw) 6256 { 6257 constant(x, yzw, yzw, yzw); 6258 } 6259 6260 Float4::Float4(float x, float y, float zw) 6261 { 6262 constant(x, y, zw, zw); 6263 } 6264 6265 Float4::Float4(float x, float y, float z, float w) 6266 { 6267 constant(x, y, z, w); 6268 } 6269 6270 void Float4::constant(float x, float y, float z, float w) 6271 { 6272 xyzw.parent = this; 6273 6274 double constantVector[4] = {x, y, z, w}; 6275 storeValue(Nucleus::createConstantVector(constantVector, getType())); 6276 } 6277 6278 Float4::Float4(RValue<Float4> rhs) 6279 { 6280 xyzw.parent = this; 6281 6282 storeValue(rhs.value); 6283 } 6284 6285 Float4::Float4(const Float4 &rhs) 6286 { 6287 xyzw.parent = this; 6288 6289 Value *value = rhs.loadValue(); 6290 storeValue(value); 6291 } 6292 6293 Float4::Float4(const Reference<Float4> &rhs) 6294 { 6295 xyzw.parent = this; 6296 6297 Value *value = rhs.loadValue(); 6298 storeValue(value); 6299 } 6300 6301 Float4::Float4(RValue<Float> rhs) 6302 { 6303 xyzw.parent = this; 6304 6305 Value *vector = loadValue(); 6306 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0); 6307 6308 int swizzle[4] = {0, 0, 0, 0}; 6309 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle); 6310 6311 storeValue(replicate); 6312 } 6313 6314 Float4::Float4(const Float &rhs) 6315 { 6316 xyzw.parent = this; 6317 6318 *this = RValue<Float>(rhs.loadValue()); 6319 } 6320 6321 Float4::Float4(const Reference<Float> &rhs) 6322 { 6323 xyzw.parent = this; 6324 6325 *this = RValue<Float>(rhs.loadValue()); 6326 } 6327 6328 RValue<Float4> Float4::operator=(float x) const 6329 { 6330 return *this = Float4(x, x, x, x); 6331 } 6332 6333 RValue<Float4> Float4::operator=(RValue<Float4> rhs) const 6334 { 6335 storeValue(rhs.value); 6336 6337 return rhs; 6338 } 6339 6340 RValue<Float4> Float4::operator=(const Float4 &rhs) const 6341 { 6342 Value *value = rhs.loadValue(); 6343 storeValue(value); 6344 6345 return RValue<Float4>(value); 6346 } 6347 6348 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const 6349 { 6350 Value *value = rhs.loadValue(); 6351 storeValue(value); 6352 6353 return RValue<Float4>(value); 6354 } 6355 6356 RValue<Float4> Float4::operator=(RValue<Float> rhs) const 6357 { 6358 return *this = Float4(rhs); 6359 } 6360 6361 RValue<Float4> Float4::operator=(const Float &rhs) const 6362 { 6363 return *this = Float4(rhs); 6364 } 6365 6366 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const 6367 { 6368 return *this = Float4(rhs); 6369 } 6370 6371 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs) 6372 { 6373 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value)); 6374 } 6375 6376 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs) 6377 { 6378 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value)); 6379 } 6380 6381 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs) 6382 { 6383 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value)); 6384 } 6385 6386 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs) 6387 { 6388 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value)); 6389 } 6390 6391 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs) 6392 { 6393 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value)); 6394 } 6395 6396 RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs) 6397 { 6398 return lhs = lhs + rhs; 6399 } 6400 6401 RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs) 6402 { 6403 return lhs = lhs - rhs; 6404 } 6405 6406 RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs) 6407 { 6408 return lhs = lhs * rhs; 6409 } 6410 6411 RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs) 6412 { 6413 return lhs = lhs / rhs; 6414 } 6415 6416 RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs) 6417 { 6418 return lhs = lhs % rhs; 6419 } 6420 6421 RValue<Float4> operator+(RValue<Float4> val) 6422 { 6423 return val; 6424 } 6425 6426 RValue<Float4> operator-(RValue<Float4> val) 6427 { 6428 return RValue<Float4>(Nucleus::createFNeg(val.value)); 6429 } 6430 6431 RValue<Float4> Abs(RValue<Float4> x) 6432 { 6433 Value *vector = Nucleus::createBitCast(x.value, Int4::getType()); 6434 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; 6435 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType()))); 6436 6437 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType())); 6438 } 6439 6440 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y) 6441 { 6442 return x86::maxps(x, y); 6443 } 6444 6445 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y) 6446 { 6447 return x86::minps(x, y); 6448 } 6449 6450 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2) 6451 { 6452 if(exactAtPow2) 6453 { 6454 // rcpps uses a piecewise-linear approximation which minimizes the relative error 6455 // but is not exact at power-of-two values. Rectify by multiplying by the inverse. 6456 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f)))); 6457 } 6458 else 6459 { 6460 return x86::rcpps(x); 6461 } 6462 } 6463 6464 RValue<Float4> RcpSqrt_pp(RValue<Float4> x) 6465 { 6466 return x86::rsqrtps(x); 6467 } 6468 6469 RValue<Float4> Sqrt(RValue<Float4> x) 6470 { 6471 return x86::sqrtps(x); 6472 } 6473 6474 RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i) 6475 { 6476 return RValue<Float4>(Nucleus::createInsertElement(val.value, element.value, i)); 6477 } 6478 6479 RValue<Float> Extract(RValue<Float4> x, int i) 6480 { 6481 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i)); 6482 } 6483 6484 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select) 6485 { 6486 return RValue<Float4>(createSwizzle4(x.value, select)); 6487 } 6488 6489 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 6490 { 6491 int shuffle[4] = 6492 { 6493 ((imm >> 0) & 0x03) + 0, 6494 ((imm >> 2) & 0x03) + 0, 6495 ((imm >> 4) & 0x03) + 4, 6496 ((imm >> 6) & 0x03) + 4, 6497 }; 6498 6499 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 6500 } 6501 6502 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y) 6503 { 6504 int shuffle[4] = {0, 4, 1, 5}; 6505 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 6506 } 6507 6508 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y) 6509 { 6510 int shuffle[4] = {2, 6, 3, 7}; 6511 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle)); 6512 } 6513 6514 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select) 6515 { 6516 Value *vector = lhs.loadValue(); 6517 Value *shuffle = createMask4(vector, rhs.value, select); 6518 lhs.storeValue(shuffle); 6519 6520 return RValue<Float4>(shuffle); 6521 } 6522 6523 RValue<Int> SignMask(RValue<Float4> x) 6524 { 6525 return x86::movmskps(x); 6526 } 6527 6528 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y) 6529 { 6530 // return As<Int4>(x86::cmpeqps(x, y)); 6531 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType())); 6532 } 6533 6534 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y) 6535 { 6536 // return As<Int4>(x86::cmpltps(x, y)); 6537 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType())); 6538 } 6539 6540 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y) 6541 { 6542 // return As<Int4>(x86::cmpleps(x, y)); 6543 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType())); 6544 } 6545 6546 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y) 6547 { 6548 // return As<Int4>(x86::cmpneqps(x, y)); 6549 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType())); 6550 } 6551 6552 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y) 6553 { 6554 // return As<Int4>(x86::cmpnltps(x, y)); 6555 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType())); 6556 } 6557 6558 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y) 6559 { 6560 // return As<Int4>(x86::cmpnleps(x, y)); 6561 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType())); 6562 } 6563 6564 RValue<Float4> Round(RValue<Float4> x) 6565 { 6566 if(CPUID::supportsSSE4_1()) 6567 { 6568 return x86::roundps(x, 0); 6569 } 6570 else 6571 { 6572 return Float4(RoundInt(x)); 6573 } 6574 } 6575 6576 RValue<Float4> Trunc(RValue<Float4> x) 6577 { 6578 if(CPUID::supportsSSE4_1()) 6579 { 6580 return x86::roundps(x, 3); 6581 } 6582 else 6583 { 6584 return Float4(Int4(x)); // Rounded toward zero 6585 } 6586 } 6587 6588 RValue<Float4> Frac(RValue<Float4> x) 6589 { 6590 if(CPUID::supportsSSE4_1()) 6591 { 6592 return x - x86::floorps(x); 6593 } 6594 else 6595 { 6596 Float4 frc = x - Float4(Int4(x)); // Signed fractional part 6597 6598 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1))); 6599 } 6600 } 6601 6602 RValue<Float4> Floor(RValue<Float4> x) 6603 { 6604 if(CPUID::supportsSSE4_1()) 6605 { 6606 return x86::floorps(x); 6607 } 6608 else 6609 { 6610 return x - Frac(x); 6611 } 6612 } 6613 6614 RValue<Float4> Ceil(RValue<Float4> x) 6615 { 6616 if(CPUID::supportsSSE4_1()) 6617 { 6618 return x86::ceilps(x); 6619 } 6620 else 6621 { 6622 return -Floor(-x); 6623 } 6624 } 6625 6626 Type *Float4::getType() 6627 { 6628 return T(VectorType::get(Float::getType(), 4)); 6629 } 6630 6631 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset) 6632 { 6633 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset)))); 6634 } 6635 6636 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 6637 { 6638 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value)); 6639 } 6640 6641 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 6642 { 6643 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value)); 6644 } 6645 6646 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset) 6647 { 6648 return lhs = lhs + offset; 6649 } 6650 6651 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset) 6652 { 6653 return lhs = lhs + offset; 6654 } 6655 6656 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset) 6657 { 6658 return lhs = lhs + offset; 6659 } 6660 6661 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset) 6662 { 6663 return lhs + -offset; 6664 } 6665 6666 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset) 6667 { 6668 return lhs + -offset; 6669 } 6670 6671 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset) 6672 { 6673 return lhs + -offset; 6674 } 6675 6676 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset) 6677 { 6678 return lhs = lhs - offset; 6679 } 6680 6681 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset) 6682 { 6683 return lhs = lhs - offset; 6684 } 6685 6686 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset) 6687 { 6688 return lhs = lhs - offset; 6689 } 6690 6691 void Return() 6692 { 6693 Nucleus::createRetVoid(); 6694 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6695 Nucleus::createUnreachable(); 6696 } 6697 6698 void Return(bool ret) 6699 { 6700 Nucleus::createRet(V(Nucleus::createConstantBool(ret))); 6701 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6702 Nucleus::createUnreachable(); 6703 } 6704 6705 void Return(const Int &ret) 6706 { 6707 Nucleus::createRet(ret.loadValue()); 6708 Nucleus::setInsertBlock(Nucleus::createBasicBlock()); 6709 Nucleus::createUnreachable(); 6710 } 6711 6712 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB) 6713 { 6714 Nucleus::createCondBr(cmp.value, bodyBB, endBB); 6715 Nucleus::setInsertBlock(bodyBB); 6716 6717 return true; 6718 } 6719 6720 void endIf(BasicBlock *falseBB) 6721 { 6722 ::falseBB = falseBB; 6723 } 6724 6725 bool elseBlock(BasicBlock *falseBB) 6726 { 6727 assert(falseBB && "Else not preceded by If"); 6728 falseBB->back().eraseFromParent(); 6729 Nucleus::setInsertBlock(falseBB); 6730 6731 return true; 6732 } 6733 6734 BasicBlock *beginElse() 6735 { 6736 BasicBlock *falseBB = ::falseBB; 6737 ::falseBB = nullptr; 6738 6739 return falseBB; 6740 } 6741 6742 RValue<Long> Ticks() 6743 { 6744 llvm::Function *rdtsc = Intrinsic::getDeclaration(::module, Intrinsic::readcyclecounter); 6745 6746 return RValue<Long>(V(::builder->CreateCall(rdtsc))); 6747 } 6748} 6749 6750namespace sw 6751{ 6752 namespace x86 6753 { 6754 RValue<Int> cvtss2si(RValue<Float> val) 6755 { 6756 llvm::Function *cvtss2si = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtss2si); 6757 6758 Float4 vector; 6759 vector.x = val; 6760 6761 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value))); 6762 } 6763 6764 RValue<Int2> cvtps2pi(RValue<Float4> val) 6765 { 6766 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtps2pi); 6767 6768 return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value))); 6769 } 6770 6771 RValue<Int2> cvttps2pi(RValue<Float4> val) 6772 { 6773 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvttps2pi); 6774 6775 return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value))); 6776 } 6777 6778 RValue<Int4> cvtps2dq(RValue<Float4> val) 6779 { 6780 if(CPUID::supportsSSE2()) 6781 { 6782 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_cvtps2dq); 6783 6784 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value))); 6785 } 6786 else 6787 { 6788 Int2 lo = x86::cvtps2pi(val); 6789 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE)); 6790 6791 return Int4(lo, hi); 6792 } 6793 } 6794 6795 RValue<Float> rcpss(RValue<Float> val) 6796 { 6797 llvm::Function *rcpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ss); 6798 6799 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0); 6800 6801 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0)); 6802 } 6803 6804 RValue<Float> sqrtss(RValue<Float> val) 6805 { 6806 llvm::Function *sqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ss); 6807 6808 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0); 6809 6810 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0)); 6811 } 6812 6813 RValue<Float> rsqrtss(RValue<Float> val) 6814 { 6815 llvm::Function *rsqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ss); 6816 6817 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0); 6818 6819 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0)); 6820 } 6821 6822 RValue<Float4> rcpps(RValue<Float4> val) 6823 { 6824 llvm::Function *rcpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ps); 6825 6826 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value))); 6827 } 6828 6829 RValue<Float4> sqrtps(RValue<Float4> val) 6830 { 6831 llvm::Function *sqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ps); 6832 6833 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value))); 6834 } 6835 6836 RValue<Float4> rsqrtps(RValue<Float4> val) 6837 { 6838 llvm::Function *rsqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ps); 6839 6840 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value))); 6841 } 6842 6843 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y) 6844 { 6845 llvm::Function *maxps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_max_ps); 6846 6847 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value))); 6848 } 6849 6850 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y) 6851 { 6852 llvm::Function *minps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_min_ps); 6853 6854 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value))); 6855 } 6856 6857 RValue<Float> roundss(RValue<Float> val, unsigned char imm) 6858 { 6859 llvm::Function *roundss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ss); 6860 6861 Value *undef = V(UndefValue::get(Float4::getType())); 6862 Value *vector = Nucleus::createInsertElement(undef, val.value, 0); 6863 6864 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0)); 6865 } 6866 6867 RValue<Float> floorss(RValue<Float> val) 6868 { 6869 return roundss(val, 1); 6870 } 6871 6872 RValue<Float> ceilss(RValue<Float> val) 6873 { 6874 return roundss(val, 2); 6875 } 6876 6877 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm) 6878 { 6879 llvm::Function *roundps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ps); 6880 6881 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm))))); 6882 } 6883 6884 RValue<Float4> floorps(RValue<Float4> val) 6885 { 6886 return roundps(val, 1); 6887 } 6888 6889 RValue<Float4> ceilps(RValue<Float4> val) 6890 { 6891 return roundps(val, 2); 6892 } 6893 6894 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm) 6895 { 6896 llvm::Function *cmpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ps); 6897 6898 return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm))))); 6899 } 6900 6901 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y) 6902 { 6903 return cmpps(x, y, 0); 6904 } 6905 6906 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y) 6907 { 6908 return cmpps(x, y, 1); 6909 } 6910 6911 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y) 6912 { 6913 return cmpps(x, y, 2); 6914 } 6915 6916 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y) 6917 { 6918 return cmpps(x, y, 3); 6919 } 6920 6921 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y) 6922 { 6923 return cmpps(x, y, 4); 6924 } 6925 6926 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y) 6927 { 6928 return cmpps(x, y, 5); 6929 } 6930 6931 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y) 6932 { 6933 return cmpps(x, y, 6); 6934 } 6935 6936 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y) 6937 { 6938 return cmpps(x, y, 7); 6939 } 6940 6941 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm) 6942 { 6943 llvm::Function *cmpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ss); 6944 6945 Value *vector1 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), x.value, 0); 6946 Value *vector2 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), y.value, 0); 6947 6948 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), Float::getType(), 0)); 6949 } 6950 6951 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y) 6952 { 6953 return cmpss(x, y, 0); 6954 } 6955 6956 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y) 6957 { 6958 return cmpss(x, y, 1); 6959 } 6960 6961 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y) 6962 { 6963 return cmpss(x, y, 2); 6964 } 6965 6966 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y) 6967 { 6968 return cmpss(x, y, 3); 6969 } 6970 6971 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y) 6972 { 6973 return cmpss(x, y, 4); 6974 } 6975 6976 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y) 6977 { 6978 return cmpss(x, y, 5); 6979 } 6980 6981 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y) 6982 { 6983 return cmpss(x, y, 6); 6984 } 6985 6986 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y) 6987 { 6988 return cmpss(x, y, 7); 6989 } 6990 6991 RValue<Int4> pabsd(RValue<Int4> x) 6992 { 6993 llvm::Function *pabsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_ssse3_pabs_d_128); 6994 6995 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value))); 6996 } 6997 6998 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y) 6999 { 7000 llvm::Function *paddsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_w); 7001 7002 return As<Short4>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value))); 7003 } 7004 7005 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y) 7006 { 7007 llvm::Function *psubsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_w); 7008 7009 return As<Short4>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value))); 7010 } 7011 7012 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y) 7013 { 7014 llvm::Function *paddusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_w); 7015 7016 return As<UShort4>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value))); 7017 } 7018 7019 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y) 7020 { 7021 llvm::Function *psubusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_w); 7022 7023 return As<UShort4>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value))); 7024 } 7025 7026 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y) 7027 { 7028 llvm::Function *paddsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_b); 7029 7030 return As<SByte8>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value))); 7031 } 7032 7033 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y) 7034 { 7035 llvm::Function *psubsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_b); 7036 7037 return As<SByte8>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value))); 7038 } 7039 7040 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y) 7041 { 7042 llvm::Function *paddusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_b); 7043 7044 return As<Byte8>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value))); 7045 } 7046 7047 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y) 7048 { 7049 llvm::Function *psubusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_b); 7050 7051 return As<Byte8>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value))); 7052 } 7053 7054 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y) 7055 { 7056 llvm::Function *paddw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_w); 7057 7058 return As<Short4>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value))); 7059 } 7060 7061 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y) 7062 { 7063 llvm::Function *psubw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_w); 7064 7065 return As<Short4>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value))); 7066 } 7067 7068 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y) 7069 { 7070 llvm::Function *pmullw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmull_w); 7071 7072 return As<Short4>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value))); 7073 } 7074 7075 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y) 7076 { 7077 llvm::Function *pand = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pand); 7078 7079 return As<Short4>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value))); 7080 } 7081 7082 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y) 7083 { 7084 llvm::Function *por = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_por); 7085 7086 return As<Short4>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value))); 7087 } 7088 7089 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y) 7090 { 7091 llvm::Function *pxor = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pxor); 7092 7093 return As<Short4>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value))); 7094 } 7095 7096 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y) 7097 { 7098 llvm::Function *pshufw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_pshuf_w); 7099 7100 return As<Short4>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y))))); 7101 } 7102 7103 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y) 7104 { 7105 llvm::Function *punpcklwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklwd); 7106 7107 return As<Int2>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value))); 7108 } 7109 7110 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y) 7111 { 7112 llvm::Function *punpckhwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhwd); 7113 7114 return As<Int2>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value))); 7115 } 7116 7117 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i) 7118 { 7119 llvm::Function *pinsrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pinsr_w); 7120 7121 return As<Short4>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i))))); 7122 } 7123 7124 RValue<Int> pextrw(RValue<Short4> x, unsigned int i) 7125 { 7126 llvm::Function *pextrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pextr_w); 7127 7128 return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i))))); 7129 } 7130 7131 RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y) 7132 { 7133 llvm::Function *punpckldq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckldq); 7134 7135 return As<Long1>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value))); 7136 } 7137 7138 RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y) 7139 { 7140 llvm::Function *punpckhdq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhdq); 7141 7142 return As<Long1>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value))); 7143 } 7144 7145 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y) 7146 { 7147 llvm::Function *punpcklbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklbw); 7148 7149 return As<Short4>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value))); 7150 } 7151 7152 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y) 7153 { 7154 llvm::Function *punpckhbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhbw); 7155 7156 return As<Short4>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value))); 7157 } 7158 7159 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y) 7160 { 7161 llvm::Function *paddb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_b); 7162 7163 return As<Byte8>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value))); 7164 } 7165 7166 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y) 7167 { 7168 llvm::Function *psubb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_b); 7169 7170 return As<Byte8>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value))); 7171 } 7172 7173 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y) 7174 { 7175 llvm::Function *paddd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_d); 7176 7177 return As<Int2>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value))); 7178 } 7179 7180 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y) 7181 { 7182 llvm::Function *psubd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_d); 7183 7184 return As<Int2>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value))); 7185 } 7186 7187 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y) 7188 { 7189 llvm::Function *pavgw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pavg_w); 7190 7191 return As<UShort4>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value))); 7192 } 7193 7194 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y) 7195 { 7196 llvm::Function *pmaxsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmaxs_w); 7197 7198 return As<Short4>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value))); 7199 } 7200 7201 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y) 7202 { 7203 llvm::Function *pminsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmins_w); 7204 7205 return As<Short4>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value))); 7206 } 7207 7208 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y) 7209 { 7210 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_w); 7211 7212 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value))); 7213 } 7214 7215 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y) 7216 { 7217 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_w); 7218 7219 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value))); 7220 } 7221 7222 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y) 7223 { 7224 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_b); 7225 7226 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value))); 7227 } 7228 7229 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y) 7230 { 7231 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_b); 7232 7233 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value))); 7234 } 7235 7236 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y) 7237 { 7238 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packssdw); 7239 7240 return As<Short4>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value))); 7241 } 7242 7243 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y) 7244 { 7245 if(CPUID::supportsSSE2()) 7246 { 7247 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_packssdw_128); 7248 7249 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value))); 7250 } 7251 else 7252 { 7253 Int2 loX = Int2(x); 7254 Int2 hiX = Int2(Swizzle(x, 0xEE)); 7255 7256 Int2 loY = Int2(y); 7257 Int2 hiY = Int2(Swizzle(y, 0xEE)); 7258 7259 Short4 lo = x86::packssdw(loX, hiX); 7260 Short4 hi = x86::packssdw(loY, hiY); 7261 7262 return Short8(lo, hi); 7263 } 7264 } 7265 7266 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y) 7267 { 7268 llvm::Function *packsswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packsswb); 7269 7270 return As<SByte8>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value))); 7271 } 7272 7273 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y) 7274 { 7275 llvm::Function *packuswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packuswb); 7276 7277 return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value))); 7278 } 7279 7280 RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y) 7281 { 7282 if(CPUID::supportsSSE4_1()) 7283 { 7284 llvm::Function *packusdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_packusdw); 7285 7286 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value))); 7287 } 7288 else 7289 { 7290 // FIXME: Not an exact replacement! 7291 return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u)); 7292 } 7293 } 7294 7295 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y) 7296 { 7297 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_w); 7298 7299 return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); 7300 } 7301 7302 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y) 7303 { 7304 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_w); 7305 7306 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y))))); 7307 } 7308 7309 RValue<Short4> psraw(RValue<Short4> x, unsigned char y) 7310 { 7311 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_w); 7312 7313 return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); 7314 } 7315 7316 RValue<Short8> psraw(RValue<Short8> x, unsigned char y) 7317 { 7318 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_w); 7319 7320 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y))))); 7321 } 7322 7323 RValue<Short4> psllw(RValue<Short4> x, unsigned char y) 7324 { 7325 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_w); 7326 7327 return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); 7328 } 7329 7330 RValue<Short8> psllw(RValue<Short8> x, unsigned char y) 7331 { 7332 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_w); 7333 7334 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y))))); 7335 } 7336 7337 RValue<Int2> pslld(RValue<Int2> x, unsigned char y) 7338 { 7339 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_d); 7340 7341 return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); 7342 } 7343 7344 RValue<Int4> pslld(RValue<Int4> x, unsigned char y) 7345 { 7346 if(CPUID::supportsSSE2()) 7347 { 7348 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_d); 7349 7350 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y))))); 7351 } 7352 else 7353 { 7354 Int2 lo = Int2(x); 7355 Int2 hi = Int2(Swizzle(x, 0xEE)); 7356 7357 lo = x86::pslld(lo, y); 7358 hi = x86::pslld(hi, y); 7359 7360 return Int4(lo, hi); 7361 } 7362 } 7363 7364 RValue<Int2> psrad(RValue<Int2> x, unsigned char y) 7365 { 7366 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_d); 7367 7368 return As<Int2>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); 7369 } 7370 7371 RValue<Int4> psrad(RValue<Int4> x, unsigned char y) 7372 { 7373 if(CPUID::supportsSSE2()) 7374 { 7375 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_d); 7376 7377 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y))))); 7378 } 7379 else 7380 { 7381 Int2 lo = Int2(x); 7382 Int2 hi = Int2(Swizzle(x, 0xEE)); 7383 7384 lo = x86::psrad(lo, y); 7385 hi = x86::psrad(hi, y); 7386 7387 return Int4(lo, hi); 7388 } 7389 } 7390 7391 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y) 7392 { 7393 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_d); 7394 7395 return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))); 7396 } 7397 7398 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y) 7399 { 7400 if(CPUID::supportsSSE2()) 7401 { 7402 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_d); 7403 7404 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y))))); 7405 } 7406 else 7407 { 7408 UInt2 lo = As<UInt2>(Int2(As<Int4>(x))); 7409 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE))); 7410 7411 lo = x86::psrld(lo, y); 7412 hi = x86::psrld(hi, y); 7413 7414 return UInt4(lo, hi); 7415 } 7416 } 7417 7418 RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y) 7419 { 7420 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrl_w); 7421 7422 return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, As<MMX>(y).value))); 7423 } 7424 7425 RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y) 7426 { 7427 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psra_w); 7428 7429 return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, As<MMX>(y).value))); 7430 } 7431 7432 RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y) 7433 { 7434 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psll_w); 7435 7436 return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, As<MMX>(y).value))); 7437 } 7438 7439 RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y) 7440 { 7441 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psll_d); 7442 7443 return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, As<MMX>(y).value))); 7444 } 7445 7446 RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y) 7447 { 7448 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrl_d); 7449 7450 return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, As<MMX>(y).value))); 7451 } 7452 7453 RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y) 7454 { 7455 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psra_d); 7456 7457 return As<Int2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, As<MMX>(y).value))); 7458 } 7459 7460 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y) 7461 { 7462 llvm::Function *pmaxsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxsd); 7463 7464 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value))); 7465 } 7466 7467 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y) 7468 { 7469 llvm::Function *pminsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminsd); 7470 7471 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value))); 7472 } 7473 7474 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y) 7475 { 7476 llvm::Function *pmaxud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxud); 7477 7478 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value))); 7479 } 7480 7481 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y) 7482 { 7483 llvm::Function *pminud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminud); 7484 7485 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value))); 7486 } 7487 7488 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y) 7489 { 7490 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulh_w); 7491 7492 return As<Short4>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value))); 7493 } 7494 7495 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y) 7496 { 7497 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulhu_w); 7498 7499 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value))); 7500 } 7501 7502 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y) 7503 { 7504 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmadd_wd); 7505 7506 return As<Int2>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value))); 7507 } 7508 7509 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y) 7510 { 7511 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulh_w); 7512 7513 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value))); 7514 } 7515 7516 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y) 7517 { 7518 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulhu_w); 7519 7520 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value))); 7521 } 7522 7523 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y) 7524 { 7525 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmadd_wd); 7526 7527 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value))); 7528 } 7529 7530 RValue<Int> movmskps(RValue<Float4> x) 7531 { 7532 llvm::Function *movmskps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_movmsk_ps); 7533 7534 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value))); 7535 } 7536 7537 RValue<Int> pmovmskb(RValue<Byte8> x) 7538 { 7539 llvm::Function *pmovmskb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmovmskb); 7540 7541 return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value))); 7542 } 7543 7544 //RValue<Int2> movd(RValue<Pointer<Int>> x) 7545 //{ 7546 // Value *element = Nucleus::createLoad(x.value); 7547 7548 //// Value *int2 = UndefValue::get(Int2::getType()); 7549 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0)); 7550 7551 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType()); 7552 7553 // return RValue<Int2>(int2); 7554 //} 7555 7556 //RValue<Int2> movdq2q(RValue<Int4> x) 7557 //{ 7558 // Value *long2 = Nucleus::createBitCast(x.value, T(VectorType::get(Long::getType(), 2))); 7559 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0)); 7560 7561 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType())); 7562 //} 7563 7564 RValue<Int4> pmovzxbd(RValue<Int4> x) 7565 { 7566 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxbd); 7567 7568 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType())))); 7569 } 7570 7571 RValue<Int4> pmovsxbd(RValue<Int4> x) 7572 { 7573 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxbd); 7574 7575 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType())))); 7576 } 7577 7578 RValue<Int4> pmovzxwd(RValue<Int4> x) 7579 { 7580 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxwd); 7581 7582 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType())))); 7583 } 7584 7585 RValue<Int4> pmovsxwd(RValue<Int4> x) 7586 { 7587 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxwd); 7588 7589 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType())))); 7590 } 7591 7592 void emms() 7593 { 7594 llvm::Function *emms = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_emms); 7595 7596 V(::builder->CreateCall(emms)); 7597 } 7598 } 7599} 7600