1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Reactor.hpp"
16
17#include "llvm/Support/IRBuilder.h"
18#include "llvm/Function.h"
19#include "llvm/GlobalVariable.h"
20#include "llvm/Module.h"
21#include "llvm/LLVMContext.h"
22#include "llvm/Constants.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/PassManager.h"
25#include "llvm/Analysis/LoopPass.h"
26#include "llvm/Transforms/Scalar.h"
27#include "llvm/Target/TargetData.h"
28#include "llvm/Target/TargetOptions.h"
29#include "llvm/Support/TargetSelect.h"
30#include "../lib/ExecutionEngine/JIT/JIT.h"
31
32#include "LLVMRoutine.hpp"
33#include "LLVMRoutineManager.hpp"
34#include "x86.hpp"
35#include "Common/CPUID.hpp"
36#include "Common/Thread.hpp"
37#include "Common/Memory.hpp"
38#include "Common/MutexLock.hpp"
39
40#include <fstream>
41
42#if defined(__i386__) || defined(__x86_64__)
43#include <xmmintrin.h>
44#endif
45
46#if defined(__x86_64__) && defined(_WIN32)
47extern "C" void X86CompilationCallback()
48{
49	assert(false);   // UNIMPLEMENTED
50}
51#endif
52
53extern "C"
54{
55	bool (*CodeAnalystInitialize)() = 0;
56	void (*CodeAnalystCompleteJITLog)() = 0;
57	bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
58}
59
60namespace llvm
61{
62	extern bool JITEmitDebugInfo;
63}
64
65namespace
66{
67	sw::LLVMRoutineManager *routineManager = nullptr;
68	llvm::ExecutionEngine *executionEngine = nullptr;
69	llvm::IRBuilder<> *builder = nullptr;
70	llvm::LLVMContext *context = nullptr;
71	llvm::Module *module = nullptr;
72	llvm::Function *function = nullptr;
73
74	sw::MutexLock codegenMutex;
75}
76
77namespace sw
78{
79	Optimization optimization[10] = {InstructionCombining, Disabled};
80
81	enum EmulatedType
82	{
83		Type_v2i32,
84		Type_v4i16,
85		Type_v2i16,
86		Type_v8i8,
87		Type_v4i8,
88		Type_v2f32,
89		EmulatedTypeCount
90	};
91
92	class Value : public llvm::Value {};
93	class SwitchCases : public llvm::SwitchInst {};
94	class BasicBlock : public llvm::BasicBlock {};
95
96	llvm::Type *T(Type *t)
97	{
98		uintptr_t type = reinterpret_cast<uintptr_t>(t);
99		if(type < EmulatedTypeCount)
100		{
101			// Use 128-bit vectors to implement logically shorter ones.
102			switch(type)
103			{
104			case Type_v2i32: return T(Int4::getType());
105			case Type_v4i16: return T(Short8::getType());
106			case Type_v2i16: return T(Short8::getType());
107			case Type_v8i8:  return T(Byte16::getType());
108			case Type_v4i8:  return T(Byte16::getType());
109			case Type_v2f32: return T(Float4::getType());
110			default: assert(false);
111			}
112		}
113
114		return reinterpret_cast<llvm::Type*>(t);
115	}
116
117	inline Type *T(llvm::Type *t)
118	{
119		return reinterpret_cast<Type*>(t);
120	}
121
122	Type *T(EmulatedType t)
123	{
124		return reinterpret_cast<Type*>(t);
125	}
126
127	inline Value *V(llvm::Value *t)
128	{
129		return reinterpret_cast<Value*>(t);
130	}
131
132	inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
133	{
134		return reinterpret_cast<std::vector<llvm::Type*>&>(t);
135	}
136
137	inline BasicBlock *B(llvm::BasicBlock *t)
138	{
139		return reinterpret_cast<BasicBlock*>(t);
140	}
141
142	static size_t typeSize(Type *type)
143	{
144		uintptr_t t = reinterpret_cast<uintptr_t>(type);
145		if(t < EmulatedTypeCount)
146		{
147			switch(t)
148			{
149			case Type_v2i32: return 8;
150			case Type_v4i16: return 8;
151			case Type_v2i16: return 4;
152			case Type_v8i8:  return 8;
153			case Type_v4i8:  return 4;
154			case Type_v2f32: return 8;
155			default: assert(false);
156			}
157		}
158
159		return T(type)->getPrimitiveSizeInBits() / 8;
160	}
161
162	static unsigned int elementCount(Type *type)
163	{
164		uintptr_t t = reinterpret_cast<uintptr_t>(type);
165		if(t < EmulatedTypeCount)
166		{
167			switch(t)
168			{
169			case Type_v2i32: return 2;
170			case Type_v4i16: return 4;
171			case Type_v2i16: return 2;
172			case Type_v8i8:  return 8;
173			case Type_v4i8:  return 4;
174			case Type_v2f32: return 2;
175			default: assert(false);
176			}
177		}
178
179		return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
180	}
181
182	Nucleus::Nucleus()
183	{
184		::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
185
186		llvm::InitializeNativeTarget();
187		llvm::JITEmitDebugInfo = false;
188
189		if(!::context)
190		{
191			::context = new llvm::LLVMContext();
192		}
193
194		::module = new llvm::Module("", *::context);
195		::routineManager = new LLVMRoutineManager();
196
197		#if defined(__x86_64__)
198			const char *architecture = "x86-64";
199		#else
200			const char *architecture = "x86";
201		#endif
202
203		llvm::SmallVector<std::string, 1> MAttrs;
204		MAttrs.push_back(CPUID::supportsMMX()    ? "+mmx"   : "-mmx");
205		MAttrs.push_back(CPUID::supportsCMOV()   ? "+cmov"  : "-cmov");
206		MAttrs.push_back(CPUID::supportsSSE()    ? "+sse"   : "-sse");
207		MAttrs.push_back(CPUID::supportsSSE2()   ? "+sse2"  : "-sse2");
208		MAttrs.push_back(CPUID::supportsSSE3()   ? "+sse3"  : "-sse3");
209		MAttrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3" : "-ssse3");
210		MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
211
212		std::string error;
213		llvm::TargetMachine *targetMachine = llvm::EngineBuilder::selectTarget(::module, architecture, "", MAttrs, llvm::Reloc::Default, llvm::CodeModel::JITDefault, &error);
214		::executionEngine = llvm::JIT::createJIT(::module, 0, ::routineManager, llvm::CodeGenOpt::Aggressive, true, targetMachine);
215
216		if(!::builder)
217		{
218			::builder = new llvm::IRBuilder<>(*::context);
219
220			#if defined(_WIN32)
221				HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
222				if(CodeAnalyst)
223				{
224					CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
225					CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
226					CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
227
228					CodeAnalystInitialize();
229				}
230			#endif
231		}
232	}
233
234	Nucleus::~Nucleus()
235	{
236		delete ::executionEngine;
237		::executionEngine = nullptr;
238
239		::routineManager = nullptr;
240		::function = nullptr;
241		::module = nullptr;
242
243		::codegenMutex.unlock();
244	}
245
246	Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
247	{
248		if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
249		{
250			llvm::Type *type = ::function->getReturnType();
251
252			if(type->isVoidTy())
253			{
254				createRetVoid();
255			}
256			else
257			{
258				createRet(V(llvm::UndefValue::get(type)));
259			}
260		}
261
262		if(false)
263		{
264			std::string error;
265			llvm::raw_fd_ostream file("llvm-dump-unopt.txt", error);
266			::module->print(file, 0);
267		}
268
269		if(runOptimizations)
270		{
271			optimize();
272		}
273
274		if(false)
275		{
276			std::string error;
277			llvm::raw_fd_ostream file("llvm-dump-opt.txt", error);
278			::module->print(file, 0);
279		}
280
281		void *entry = ::executionEngine->getPointerToFunction(::function);
282		LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
283
284		if(CodeAnalystLogJITCode)
285		{
286			CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
287		}
288
289		return routine;
290	}
291
292	void Nucleus::optimize()
293	{
294		static llvm::PassManager *passManager = nullptr;
295
296		if(!passManager)
297		{
298			passManager = new llvm::PassManager();
299
300			llvm::UnsafeFPMath = true;
301		//	llvm::NoInfsFPMath = true;
302		//	llvm::NoNaNsFPMath = true;
303
304			passManager->add(new llvm::TargetData(*::executionEngine->getTargetData()));
305			passManager->add(llvm::createScalarReplAggregatesPass());
306
307			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
308			{
309				switch(optimization[pass])
310				{
311				case Disabled:                                                                       break;
312				case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
313				case LICM:                 passManager->add(llvm::createLICMPass());                 break;
314				case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
315				case GVN:                  passManager->add(llvm::createGVNPass());                  break;
316				case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
317				case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
318				case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
319				case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
320				case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
321				default:
322					assert(false);
323				}
324			}
325		}
326
327		passManager->run(*::module);
328	}
329
330	Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
331	{
332		// Need to allocate it in the entry block for mem2reg to work
333		llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
334
335		llvm::Instruction *declaration;
336
337		if(arraySize)
338		{
339			declaration = new llvm::AllocaInst(T(type), Nucleus::createConstantInt(arraySize));
340		}
341		else
342		{
343			declaration = new llvm::AllocaInst(T(type), (Value*)nullptr);
344		}
345
346		entryBlock.getInstList().push_front(declaration);
347
348		return V(declaration);
349	}
350
351	BasicBlock *Nucleus::createBasicBlock()
352	{
353		return B(BasicBlock::Create(*::context, "", ::function));
354	}
355
356	BasicBlock *Nucleus::getInsertBlock()
357	{
358		return B(::builder->GetInsertBlock());
359	}
360
361	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
362	{
363	//	assert(::builder->GetInsertBlock()->back().isTerminator());
364		return ::builder->SetInsertPoint(basicBlock);
365	}
366
367	void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
368	{
369		llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
370		::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
371		::function->setCallingConv(llvm::CallingConv::C);
372
373		::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
374	}
375
376	Value *Nucleus::getArgument(unsigned int index)
377	{
378		llvm::Function::arg_iterator args = ::function->arg_begin();
379
380		while(index)
381		{
382			args++;
383			index--;
384		}
385
386		return V(&*args);
387	}
388
389	void Nucleus::createRetVoid()
390	{
391		::builder->CreateRetVoid();
392	}
393
394	void Nucleus::createRet(Value *v)
395	{
396		::builder->CreateRet(v);
397	}
398
399	void Nucleus::createBr(BasicBlock *dest)
400	{
401		::builder->CreateBr(dest);
402	}
403
404	void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
405	{
406		::builder->CreateCondBr(cond, ifTrue, ifFalse);
407	}
408
409	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
410	{
411		return V(::builder->CreateAdd(lhs, rhs));
412	}
413
414	Value *Nucleus::createSub(Value *lhs, Value *rhs)
415	{
416		return V(::builder->CreateSub(lhs, rhs));
417	}
418
419	Value *Nucleus::createMul(Value *lhs, Value *rhs)
420	{
421		return V(::builder->CreateMul(lhs, rhs));
422	}
423
424	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
425	{
426		return V(::builder->CreateUDiv(lhs, rhs));
427	}
428
429	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
430	{
431		return V(::builder->CreateSDiv(lhs, rhs));
432	}
433
434	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
435	{
436		return V(::builder->CreateFAdd(lhs, rhs));
437	}
438
439	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
440	{
441		return V(::builder->CreateFSub(lhs, rhs));
442	}
443
444	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
445	{
446		return V(::builder->CreateFMul(lhs, rhs));
447	}
448
449	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
450	{
451		return V(::builder->CreateFDiv(lhs, rhs));
452	}
453
454	Value *Nucleus::createURem(Value *lhs, Value *rhs)
455	{
456		return V(::builder->CreateURem(lhs, rhs));
457	}
458
459	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
460	{
461		return V(::builder->CreateSRem(lhs, rhs));
462	}
463
464	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
465	{
466		return V(::builder->CreateFRem(lhs, rhs));
467	}
468
469	Value *Nucleus::createShl(Value *lhs, Value *rhs)
470	{
471		return V(::builder->CreateShl(lhs, rhs));
472	}
473
474	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
475	{
476		return V(::builder->CreateLShr(lhs, rhs));
477	}
478
479	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
480	{
481		return V(::builder->CreateAShr(lhs, rhs));
482	}
483
484	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
485	{
486		return V(::builder->CreateAnd(lhs, rhs));
487	}
488
489	Value *Nucleus::createOr(Value *lhs, Value *rhs)
490	{
491		return V(::builder->CreateOr(lhs, rhs));
492	}
493
494	Value *Nucleus::createXor(Value *lhs, Value *rhs)
495	{
496		return V(::builder->CreateXor(lhs, rhs));
497	}
498
499	Value *Nucleus::createNeg(Value *v)
500	{
501		return V(::builder->CreateNeg(v));
502	}
503
504	Value *Nucleus::createFNeg(Value *v)
505	{
506		return V(::builder->CreateFNeg(v));
507	}
508
509	Value *Nucleus::createNot(Value *v)
510	{
511		return V(::builder->CreateNot(v));
512	}
513
514	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
515	{
516		uintptr_t t = reinterpret_cast<uintptr_t>(type);
517		if(t < EmulatedTypeCount)
518		{
519			switch(t)
520			{
521			case Type_v2i32:
522			case Type_v4i16:
523			case Type_v8i8:
524			case Type_v2f32:
525				return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type)));
526			case Type_v2i16:
527			case Type_v4i8:
528				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
529				{
530					Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
531					Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment));
532					i = createZExt(i, Long::getType());
533					Value *v = V(createInsertElement(u, i, 0));
534					return createBitCast(v, T(T(type)));
535				}
536				break;
537			default:
538				assert(false);
539			}
540		}
541
542		assert(ptr->getType()->getContainedType(0) == T(type));
543		return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment)));
544	}
545
546	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
547	{
548		uintptr_t t = reinterpret_cast<uintptr_t>(type);
549		if(t < EmulatedTypeCount)
550		{
551			switch(t)
552			{
553			case Type_v2i32:
554			case Type_v4i16:
555			case Type_v8i8:
556			case Type_v2f32:
557				createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment);
558				return value;
559			case Type_v2i16:
560			case Type_v4i8:
561				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
562				{
563					createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
564					return value;
565				}
566				break;
567			default:
568				assert(false);
569			}
570		}
571
572		assert(ptr->getType()->getContainedType(0) == T(type));
573		::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment));
574		return value;
575	}
576
577	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
578	{
579		if(sizeof(void*) == 8)
580		{
581			if(unsignedIndex)
582			{
583				index = createZExt(index, Long::getType());
584			}
585			else
586			{
587				index = createSExt(index, Long::getType());
588			}
589
590			index = createMul(index, createConstantLong((int64_t)typeSize(type)));
591		}
592		else
593		{
594			index = createMul(index, createConstantInt((int)typeSize(type)));
595		}
596
597		assert(ptr->getType()->getContainedType(0) == T(type));
598		return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0)));
599	}
600
601	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
602	{
603		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr, value, llvm::SequentiallyConsistent));
604	}
605
606	Value *Nucleus::createTrunc(Value *v, Type *destType)
607	{
608		return V(::builder->CreateTrunc(v, T(destType)));
609	}
610
611	Value *Nucleus::createZExt(Value *v, Type *destType)
612	{
613		return V(::builder->CreateZExt(v, T(destType)));
614	}
615
616	Value *Nucleus::createSExt(Value *v, Type *destType)
617	{
618		return V(::builder->CreateSExt(v, T(destType)));
619	}
620
621	Value *Nucleus::createFPToSI(Value *v, Type *destType)
622	{
623		return V(::builder->CreateFPToSI(v, T(destType)));
624	}
625
626	Value *Nucleus::createSIToFP(Value *v, Type *destType)
627	{
628		return V(::builder->CreateSIToFP(v, T(destType)));
629	}
630
631	Value *Nucleus::createFPTrunc(Value *v, Type *destType)
632	{
633		return V(::builder->CreateFPTrunc(v, T(destType)));
634	}
635
636	Value *Nucleus::createFPExt(Value *v, Type *destType)
637	{
638		return V(::builder->CreateFPExt(v, T(destType)));
639	}
640
641	Value *Nucleus::createBitCast(Value *v, Type *destType)
642	{
643		// Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
644		// support for casting between scalars and wide vectors. Emulate them by writing to the stack and
645		// reading back as the destination type.
646		if(!v->getType()->isVectorTy() && T(destType)->isVectorTy())
647		{
648			Value *readAddress = allocateStackVariable(destType);
649			Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0)));
650			createStore(v, writeAddress, T(v->getType()));
651			return createLoad(readAddress, destType);
652		}
653		else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy())
654		{
655			Value *writeAddress = allocateStackVariable(T(v->getType()));
656			createStore(v, writeAddress, T(v->getType()));
657			Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
658			return createLoad(readAddress, destType);
659		}
660
661		return V(::builder->CreateBitCast(v, T(destType)));
662	}
663
664	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
665	{
666		return V(::builder->CreateICmpEQ(lhs, rhs));
667	}
668
669	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
670	{
671		return V(::builder->CreateICmpNE(lhs, rhs));
672	}
673
674	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
675	{
676		return V(::builder->CreateICmpUGT(lhs, rhs));
677	}
678
679	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
680	{
681		return V(::builder->CreateICmpUGE(lhs, rhs));
682	}
683
684	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
685	{
686		return V(::builder->CreateICmpULT(lhs, rhs));
687	}
688
689	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
690	{
691		return V(::builder->CreateICmpULE(lhs, rhs));
692	}
693
694	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
695	{
696		return V(::builder->CreateICmpSGT(lhs, rhs));
697	}
698
699	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
700	{
701		return V(::builder->CreateICmpSGE(lhs, rhs));
702	}
703
704	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
705	{
706		return V(::builder->CreateICmpSLT(lhs, rhs));
707	}
708
709	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
710	{
711		return V(::builder->CreateICmpSLE(lhs, rhs));
712	}
713
714	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
715	{
716		return V(::builder->CreateFCmpOEQ(lhs, rhs));
717	}
718
719	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
720	{
721		return V(::builder->CreateFCmpOGT(lhs, rhs));
722	}
723
724	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
725	{
726		return V(::builder->CreateFCmpOGE(lhs, rhs));
727	}
728
729	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
730	{
731		return V(::builder->CreateFCmpOLT(lhs, rhs));
732	}
733
734	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
735	{
736		return V(::builder->CreateFCmpOLE(lhs, rhs));
737	}
738
739	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
740	{
741		return V(::builder->CreateFCmpONE(lhs, rhs));
742	}
743
744	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
745	{
746		return V(::builder->CreateFCmpORD(lhs, rhs));
747	}
748
749	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
750	{
751		return V(::builder->CreateFCmpUNO(lhs, rhs));
752	}
753
754	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
755	{
756		return V(::builder->CreateFCmpUEQ(lhs, rhs));
757	}
758
759	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
760	{
761		return V(::builder->CreateFCmpUGT(lhs, rhs));
762	}
763
764	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
765	{
766		return V(::builder->CreateFCmpUGE(lhs, rhs));
767	}
768
769	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
770	{
771		return V(::builder->CreateFCmpULT(lhs, rhs));
772	}
773
774	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
775	{
776		return V(::builder->CreateFCmpULE(lhs, rhs));
777	}
778
779	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
780	{
781		return V(::builder->CreateFCmpULE(lhs, rhs));
782	}
783
784	Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
785	{
786		assert(vector->getType()->getContainedType(0) == T(type));
787		return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
788	}
789
790	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
791	{
792		return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
793	}
794
795	Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
796	{
797		int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements();
798		const int maxSize = 16;
799		llvm::Constant *swizzle[maxSize];
800		assert(size <= maxSize);
801
802		for(int i = 0; i < size; i++)
803		{
804			swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
805		}
806
807		llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
808
809		return V(::builder->CreateShuffleVector(V1, V2, shuffle));
810	}
811
812	Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
813	{
814		return V(::builder->CreateSelect(C, ifTrue, ifFalse));
815	}
816
817	SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
818	{
819		return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases));
820	}
821
822	void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
823	{
824		switchCases->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), branch);
825	}
826
827	void Nucleus::createUnreachable()
828	{
829		::builder->CreateUnreachable();
830	}
831
832	static Value *createSwizzle4(Value *val, unsigned char select)
833	{
834		int swizzle[4] =
835		{
836			(select >> 0) & 0x03,
837			(select >> 2) & 0x03,
838			(select >> 4) & 0x03,
839			(select >> 6) & 0x03,
840		};
841
842		return Nucleus::createShuffleVector(val, val, swizzle);
843	}
844
845	static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
846	{
847		bool mask[4] = {false, false, false, false};
848
849		mask[(select >> 0) & 0x03] = true;
850		mask[(select >> 2) & 0x03] = true;
851		mask[(select >> 4) & 0x03] = true;
852		mask[(select >> 6) & 0x03] = true;
853
854		int swizzle[4] =
855		{
856			mask[0] ? 4 : 0,
857			mask[1] ? 5 : 1,
858			mask[2] ? 6 : 2,
859			mask[3] ? 7 : 3,
860		};
861
862		return Nucleus::createShuffleVector(lhs, rhs, swizzle);
863	}
864
865	Type *Nucleus::getPointerType(Type *ElementType)
866	{
867		return T(llvm::PointerType::get(T(ElementType), 0));
868	}
869
870	Value *Nucleus::createNullValue(Type *Ty)
871	{
872		return V(llvm::Constant::getNullValue(T(Ty)));
873	}
874
875	Value *Nucleus::createConstantLong(int64_t i)
876	{
877		return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
878	}
879
880	Value *Nucleus::createConstantInt(int i)
881	{
882		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
883	}
884
885	Value *Nucleus::createConstantInt(unsigned int i)
886	{
887		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
888	}
889
890	Value *Nucleus::createConstantBool(bool b)
891	{
892		return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
893	}
894
895	Value *Nucleus::createConstantByte(signed char i)
896	{
897		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
898	}
899
900	Value *Nucleus::createConstantByte(unsigned char i)
901	{
902		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
903	}
904
905	Value *Nucleus::createConstantShort(short i)
906	{
907		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
908	}
909
910	Value *Nucleus::createConstantShort(unsigned short i)
911	{
912		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
913	}
914
915	Value *Nucleus::createConstantFloat(float x)
916	{
917		return V(llvm::ConstantFP::get(T(Float::getType()), x));
918	}
919
920	Value *Nucleus::createNullPointer(Type *Ty)
921	{
922		return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
923	}
924
925	Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
926	{
927		assert(llvm::isa<llvm::VectorType>(T(type)));
928		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
929		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
930		assert(numElements <= 16 && numConstants <= numElements);
931		llvm::Constant *constantVector[16];
932
933		for(int i = 0; i < numElements; i++)
934		{
935			constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
936		}
937
938		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
939	}
940
941	Value *Nucleus::createConstantVector(const double *constants, Type *type)
942	{
943		assert(llvm::isa<llvm::VectorType>(T(type)));
944		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
945		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
946		assert(numElements <= 8 && numConstants <= numElements);
947		llvm::Constant *constantVector[8];
948
949		for(int i = 0; i < numElements; i++)
950		{
951			constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
952		}
953
954		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
955	}
956
957	Type *Void::getType()
958	{
959		return T(llvm::Type::getVoidTy(*::context));
960	}
961
962	Bool::Bool(Argument<Bool> argument)
963	{
964		storeValue(argument.value);
965	}
966
967	Bool::Bool(bool x)
968	{
969		storeValue(Nucleus::createConstantBool(x));
970	}
971
972	Bool::Bool(RValue<Bool> rhs)
973	{
974		storeValue(rhs.value);
975	}
976
977	Bool::Bool(const Bool &rhs)
978	{
979		Value *value = rhs.loadValue();
980		storeValue(value);
981	}
982
983	Bool::Bool(const Reference<Bool> &rhs)
984	{
985		Value *value = rhs.loadValue();
986		storeValue(value);
987	}
988
989	RValue<Bool> Bool::operator=(RValue<Bool> rhs)
990	{
991		storeValue(rhs.value);
992
993		return rhs;
994	}
995
996	RValue<Bool> Bool::operator=(const Bool &rhs)
997	{
998		Value *value = rhs.loadValue();
999		storeValue(value);
1000
1001		return RValue<Bool>(value);
1002	}
1003
1004	RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1005	{
1006		Value *value = rhs.loadValue();
1007		storeValue(value);
1008
1009		return RValue<Bool>(value);
1010	}
1011
1012	RValue<Bool> operator!(RValue<Bool> val)
1013	{
1014		return RValue<Bool>(Nucleus::createNot(val.value));
1015	}
1016
1017	RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1018	{
1019		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1020	}
1021
1022	RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1023	{
1024		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1025	}
1026
1027	Type *Bool::getType()
1028	{
1029		return T(llvm::Type::getInt1Ty(*::context));
1030	}
1031
1032	Byte::Byte(Argument<Byte> argument)
1033	{
1034		storeValue(argument.value);
1035	}
1036
1037	Byte::Byte(RValue<Int> cast)
1038	{
1039		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1040
1041		storeValue(integer);
1042	}
1043
1044	Byte::Byte(RValue<UInt> cast)
1045	{
1046		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1047
1048		storeValue(integer);
1049	}
1050
1051	Byte::Byte(RValue<UShort> cast)
1052	{
1053		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1054
1055		storeValue(integer);
1056	}
1057
1058	Byte::Byte(int x)
1059	{
1060		storeValue(Nucleus::createConstantByte((unsigned char)x));
1061	}
1062
1063	Byte::Byte(unsigned char x)
1064	{
1065		storeValue(Nucleus::createConstantByte(x));
1066	}
1067
1068	Byte::Byte(RValue<Byte> rhs)
1069	{
1070		storeValue(rhs.value);
1071	}
1072
1073	Byte::Byte(const Byte &rhs)
1074	{
1075		Value *value = rhs.loadValue();
1076		storeValue(value);
1077	}
1078
1079	Byte::Byte(const Reference<Byte> &rhs)
1080	{
1081		Value *value = rhs.loadValue();
1082		storeValue(value);
1083	}
1084
1085	RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1086	{
1087		storeValue(rhs.value);
1088
1089		return rhs;
1090	}
1091
1092	RValue<Byte> Byte::operator=(const Byte &rhs)
1093	{
1094		Value *value = rhs.loadValue();
1095		storeValue(value);
1096
1097		return RValue<Byte>(value);
1098	}
1099
1100	RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1101	{
1102		Value *value = rhs.loadValue();
1103		storeValue(value);
1104
1105		return RValue<Byte>(value);
1106	}
1107
1108	RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1109	{
1110		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1111	}
1112
1113	RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1114	{
1115		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1116	}
1117
1118	RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1119	{
1120		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1121	}
1122
1123	RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1124	{
1125		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1126	}
1127
1128	RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1129	{
1130		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1131	}
1132
1133	RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1134	{
1135		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1136	}
1137
1138	RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1139	{
1140		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1141	}
1142
1143	RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1144	{
1145		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1146	}
1147
1148	RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1149	{
1150		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1151	}
1152
1153	RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1154	{
1155		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1156	}
1157
1158	RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1159	{
1160		return lhs = lhs + rhs;
1161	}
1162
1163	RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1164	{
1165		return lhs = lhs - rhs;
1166	}
1167
1168	RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1169	{
1170		return lhs = lhs * rhs;
1171	}
1172
1173	RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1174	{
1175		return lhs = lhs / rhs;
1176	}
1177
1178	RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1179	{
1180		return lhs = lhs % rhs;
1181	}
1182
1183	RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1184	{
1185		return lhs = lhs & rhs;
1186	}
1187
1188	RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1189	{
1190		return lhs = lhs | rhs;
1191	}
1192
1193	RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1194	{
1195		return lhs = lhs ^ rhs;
1196	}
1197
1198	RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1199	{
1200		return lhs = lhs << rhs;
1201	}
1202
1203	RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1204	{
1205		return lhs = lhs >> rhs;
1206	}
1207
1208	RValue<Byte> operator+(RValue<Byte> val)
1209	{
1210		return val;
1211	}
1212
1213	RValue<Byte> operator-(RValue<Byte> val)
1214	{
1215		return RValue<Byte>(Nucleus::createNeg(val.value));
1216	}
1217
1218	RValue<Byte> operator~(RValue<Byte> val)
1219	{
1220		return RValue<Byte>(Nucleus::createNot(val.value));
1221	}
1222
1223	RValue<Byte> operator++(Byte &val, int)   // Post-increment
1224	{
1225		RValue<Byte> res = val;
1226
1227		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1228		val.storeValue(inc);
1229
1230		return res;
1231	}
1232
1233	const Byte &operator++(Byte &val)   // Pre-increment
1234	{
1235		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1236		val.storeValue(inc);
1237
1238		return val;
1239	}
1240
1241	RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1242	{
1243		RValue<Byte> res = val;
1244
1245		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1246		val.storeValue(inc);
1247
1248		return res;
1249	}
1250
1251	const Byte &operator--(Byte &val)   // Pre-decrement
1252	{
1253		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1254		val.storeValue(inc);
1255
1256		return val;
1257	}
1258
1259	RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1260	{
1261		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1262	}
1263
1264	RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1265	{
1266		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1267	}
1268
1269	RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1270	{
1271		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1272	}
1273
1274	RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1275	{
1276		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1277	}
1278
1279	RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1280	{
1281		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1282	}
1283
1284	RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1285	{
1286		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1287	}
1288
1289	Type *Byte::getType()
1290	{
1291		return T(llvm::Type::getInt8Ty(*::context));
1292	}
1293
1294	SByte::SByte(Argument<SByte> argument)
1295	{
1296		storeValue(argument.value);
1297	}
1298
1299	SByte::SByte(RValue<Int> cast)
1300	{
1301		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1302
1303		storeValue(integer);
1304	}
1305
1306	SByte::SByte(RValue<Short> cast)
1307	{
1308		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1309
1310		storeValue(integer);
1311	}
1312
1313	SByte::SByte(signed char x)
1314	{
1315		storeValue(Nucleus::createConstantByte(x));
1316	}
1317
1318	SByte::SByte(RValue<SByte> rhs)
1319	{
1320		storeValue(rhs.value);
1321	}
1322
1323	SByte::SByte(const SByte &rhs)
1324	{
1325		Value *value = rhs.loadValue();
1326		storeValue(value);
1327	}
1328
1329	SByte::SByte(const Reference<SByte> &rhs)
1330	{
1331		Value *value = rhs.loadValue();
1332		storeValue(value);
1333	}
1334
1335	RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1336	{
1337		storeValue(rhs.value);
1338
1339		return rhs;
1340	}
1341
1342	RValue<SByte> SByte::operator=(const SByte &rhs)
1343	{
1344		Value *value = rhs.loadValue();
1345		storeValue(value);
1346
1347		return RValue<SByte>(value);
1348	}
1349
1350	RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1351	{
1352		Value *value = rhs.loadValue();
1353		storeValue(value);
1354
1355		return RValue<SByte>(value);
1356	}
1357
1358	RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1359	{
1360		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1361	}
1362
1363	RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1364	{
1365		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1366	}
1367
1368	RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1369	{
1370		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1371	}
1372
1373	RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1374	{
1375		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1376	}
1377
1378	RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1379	{
1380		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1381	}
1382
1383	RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1384	{
1385		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1386	}
1387
1388	RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1389	{
1390		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1391	}
1392
1393	RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1394	{
1395		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1396	}
1397
1398	RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1399	{
1400		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1401	}
1402
1403	RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1404	{
1405		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1406	}
1407
1408	RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1409	{
1410		return lhs = lhs + rhs;
1411	}
1412
1413	RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1414	{
1415		return lhs = lhs - rhs;
1416	}
1417
1418	RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1419	{
1420		return lhs = lhs * rhs;
1421	}
1422
1423	RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1424	{
1425		return lhs = lhs / rhs;
1426	}
1427
1428	RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1429	{
1430		return lhs = lhs % rhs;
1431	}
1432
1433	RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1434	{
1435		return lhs = lhs & rhs;
1436	}
1437
1438	RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1439	{
1440		return lhs = lhs | rhs;
1441	}
1442
1443	RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1444	{
1445		return lhs = lhs ^ rhs;
1446	}
1447
1448	RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1449	{
1450		return lhs = lhs << rhs;
1451	}
1452
1453	RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1454	{
1455		return lhs = lhs >> rhs;
1456	}
1457
1458	RValue<SByte> operator+(RValue<SByte> val)
1459	{
1460		return val;
1461	}
1462
1463	RValue<SByte> operator-(RValue<SByte> val)
1464	{
1465		return RValue<SByte>(Nucleus::createNeg(val.value));
1466	}
1467
1468	RValue<SByte> operator~(RValue<SByte> val)
1469	{
1470		return RValue<SByte>(Nucleus::createNot(val.value));
1471	}
1472
1473	RValue<SByte> operator++(SByte &val, int)   // Post-increment
1474	{
1475		RValue<SByte> res = val;
1476
1477		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
1478		val.storeValue(inc);
1479
1480		return res;
1481	}
1482
1483	const SByte &operator++(SByte &val)   // Pre-increment
1484	{
1485		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1486		val.storeValue(inc);
1487
1488		return val;
1489	}
1490
1491	RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1492	{
1493		RValue<SByte> res = val;
1494
1495		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
1496		val.storeValue(inc);
1497
1498		return res;
1499	}
1500
1501	const SByte &operator--(SByte &val)   // Pre-decrement
1502	{
1503		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1504		val.storeValue(inc);
1505
1506		return val;
1507	}
1508
1509	RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1510	{
1511		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1512	}
1513
1514	RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1515	{
1516		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1517	}
1518
1519	RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1520	{
1521		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1522	}
1523
1524	RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1525	{
1526		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1527	}
1528
1529	RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1530	{
1531		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1532	}
1533
1534	RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1535	{
1536		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1537	}
1538
1539	Type *SByte::getType()
1540	{
1541		return T(llvm::Type::getInt8Ty(*::context));
1542	}
1543
1544	Short::Short(Argument<Short> argument)
1545	{
1546		storeValue(argument.value);
1547	}
1548
1549	Short::Short(RValue<Int> cast)
1550	{
1551		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1552
1553		storeValue(integer);
1554	}
1555
1556	Short::Short(short x)
1557	{
1558		storeValue(Nucleus::createConstantShort(x));
1559	}
1560
1561	Short::Short(RValue<Short> rhs)
1562	{
1563		storeValue(rhs.value);
1564	}
1565
1566	Short::Short(const Short &rhs)
1567	{
1568		Value *value = rhs.loadValue();
1569		storeValue(value);
1570	}
1571
1572	Short::Short(const Reference<Short> &rhs)
1573	{
1574		Value *value = rhs.loadValue();
1575		storeValue(value);
1576	}
1577
1578	RValue<Short> Short::operator=(RValue<Short> rhs)
1579	{
1580		storeValue(rhs.value);
1581
1582		return rhs;
1583	}
1584
1585	RValue<Short> Short::operator=(const Short &rhs)
1586	{
1587		Value *value = rhs.loadValue();
1588		storeValue(value);
1589
1590		return RValue<Short>(value);
1591	}
1592
1593	RValue<Short> Short::operator=(const Reference<Short> &rhs)
1594	{
1595		Value *value = rhs.loadValue();
1596		storeValue(value);
1597
1598		return RValue<Short>(value);
1599	}
1600
1601	RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1602	{
1603		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1604	}
1605
1606	RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1607	{
1608		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1609	}
1610
1611	RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1612	{
1613		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1614	}
1615
1616	RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1617	{
1618		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1619	}
1620
1621	RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1622	{
1623		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1624	}
1625
1626	RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1627	{
1628		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1629	}
1630
1631	RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1632	{
1633		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1634	}
1635
1636	RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1637	{
1638		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1639	}
1640
1641	RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1642	{
1643		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1644	}
1645
1646	RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1647	{
1648		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1649	}
1650
1651	RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1652	{
1653		return lhs = lhs + rhs;
1654	}
1655
1656	RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
1657	{
1658		return lhs = lhs - rhs;
1659	}
1660
1661	RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
1662	{
1663		return lhs = lhs * rhs;
1664	}
1665
1666	RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
1667	{
1668		return lhs = lhs / rhs;
1669	}
1670
1671	RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
1672	{
1673		return lhs = lhs % rhs;
1674	}
1675
1676	RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
1677	{
1678		return lhs = lhs & rhs;
1679	}
1680
1681	RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
1682	{
1683		return lhs = lhs | rhs;
1684	}
1685
1686	RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
1687	{
1688		return lhs = lhs ^ rhs;
1689	}
1690
1691	RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
1692	{
1693		return lhs = lhs << rhs;
1694	}
1695
1696	RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
1697	{
1698		return lhs = lhs >> rhs;
1699	}
1700
1701	RValue<Short> operator+(RValue<Short> val)
1702	{
1703		return val;
1704	}
1705
1706	RValue<Short> operator-(RValue<Short> val)
1707	{
1708		return RValue<Short>(Nucleus::createNeg(val.value));
1709	}
1710
1711	RValue<Short> operator~(RValue<Short> val)
1712	{
1713		return RValue<Short>(Nucleus::createNot(val.value));
1714	}
1715
1716	RValue<Short> operator++(Short &val, int)   // Post-increment
1717	{
1718		RValue<Short> res = val;
1719
1720		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
1721		val.storeValue(inc);
1722
1723		return res;
1724	}
1725
1726	const Short &operator++(Short &val)   // Pre-increment
1727	{
1728		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1729		val.storeValue(inc);
1730
1731		return val;
1732	}
1733
1734	RValue<Short> operator--(Short &val, int)   // Post-decrement
1735	{
1736		RValue<Short> res = val;
1737
1738		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
1739		val.storeValue(inc);
1740
1741		return res;
1742	}
1743
1744	const Short &operator--(Short &val)   // Pre-decrement
1745	{
1746		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1747		val.storeValue(inc);
1748
1749		return val;
1750	}
1751
1752	RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1753	{
1754		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1755	}
1756
1757	RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1758	{
1759		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1760	}
1761
1762	RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1763	{
1764		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1765	}
1766
1767	RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1768	{
1769		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1770	}
1771
1772	RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1773	{
1774		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1775	}
1776
1777	RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1778	{
1779		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1780	}
1781
1782	Type *Short::getType()
1783	{
1784		return T(llvm::Type::getInt16Ty(*::context));
1785	}
1786
1787	UShort::UShort(Argument<UShort> argument)
1788	{
1789		storeValue(argument.value);
1790	}
1791
1792	UShort::UShort(RValue<UInt> cast)
1793	{
1794		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1795
1796		storeValue(integer);
1797	}
1798
1799	UShort::UShort(RValue<Int> cast)
1800	{
1801		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1802
1803		storeValue(integer);
1804	}
1805
1806	UShort::UShort(unsigned short x)
1807	{
1808		storeValue(Nucleus::createConstantShort(x));
1809	}
1810
1811	UShort::UShort(RValue<UShort> rhs)
1812	{
1813		storeValue(rhs.value);
1814	}
1815
1816	UShort::UShort(const UShort &rhs)
1817	{
1818		Value *value = rhs.loadValue();
1819		storeValue(value);
1820	}
1821
1822	UShort::UShort(const Reference<UShort> &rhs)
1823	{
1824		Value *value = rhs.loadValue();
1825		storeValue(value);
1826	}
1827
1828	RValue<UShort> UShort::operator=(RValue<UShort> rhs)
1829	{
1830		storeValue(rhs.value);
1831
1832		return rhs;
1833	}
1834
1835	RValue<UShort> UShort::operator=(const UShort &rhs)
1836	{
1837		Value *value = rhs.loadValue();
1838		storeValue(value);
1839
1840		return RValue<UShort>(value);
1841	}
1842
1843	RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
1844	{
1845		Value *value = rhs.loadValue();
1846		storeValue(value);
1847
1848		return RValue<UShort>(value);
1849	}
1850
1851	RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1852	{
1853		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1854	}
1855
1856	RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1857	{
1858		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1859	}
1860
1861	RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1862	{
1863		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1864	}
1865
1866	RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1867	{
1868		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1869	}
1870
1871	RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1872	{
1873		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1874	}
1875
1876	RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1877	{
1878		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1879	}
1880
1881	RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1882	{
1883		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1884	}
1885
1886	RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1887	{
1888		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1889	}
1890
1891	RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1892	{
1893		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1894	}
1895
1896	RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1897	{
1898		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1899	}
1900
1901	RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
1902	{
1903		return lhs = lhs + rhs;
1904	}
1905
1906	RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
1907	{
1908		return lhs = lhs - rhs;
1909	}
1910
1911	RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
1912	{
1913		return lhs = lhs * rhs;
1914	}
1915
1916	RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
1917	{
1918		return lhs = lhs / rhs;
1919	}
1920
1921	RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
1922	{
1923		return lhs = lhs % rhs;
1924	}
1925
1926	RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
1927	{
1928		return lhs = lhs & rhs;
1929	}
1930
1931	RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
1932	{
1933		return lhs = lhs | rhs;
1934	}
1935
1936	RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
1937	{
1938		return lhs = lhs ^ rhs;
1939	}
1940
1941	RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
1942	{
1943		return lhs = lhs << rhs;
1944	}
1945
1946	RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
1947	{
1948		return lhs = lhs >> rhs;
1949	}
1950
1951	RValue<UShort> operator+(RValue<UShort> val)
1952	{
1953		return val;
1954	}
1955
1956	RValue<UShort> operator-(RValue<UShort> val)
1957	{
1958		return RValue<UShort>(Nucleus::createNeg(val.value));
1959	}
1960
1961	RValue<UShort> operator~(RValue<UShort> val)
1962	{
1963		return RValue<UShort>(Nucleus::createNot(val.value));
1964	}
1965
1966	RValue<UShort> operator++(UShort &val, int)   // Post-increment
1967	{
1968		RValue<UShort> res = val;
1969
1970		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1971		val.storeValue(inc);
1972
1973		return res;
1974	}
1975
1976	const UShort &operator++(UShort &val)   // Pre-increment
1977	{
1978		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1979		val.storeValue(inc);
1980
1981		return val;
1982	}
1983
1984	RValue<UShort> operator--(UShort &val, int)   // Post-decrement
1985	{
1986		RValue<UShort> res = val;
1987
1988		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1989		val.storeValue(inc);
1990
1991		return res;
1992	}
1993
1994	const UShort &operator--(UShort &val)   // Pre-decrement
1995	{
1996		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1997		val.storeValue(inc);
1998
1999		return val;
2000	}
2001
2002	RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2003	{
2004		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2005	}
2006
2007	RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2008	{
2009		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2010	}
2011
2012	RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2013	{
2014		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2015	}
2016
2017	RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2018	{
2019		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2020	}
2021
2022	RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2023	{
2024		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2025	}
2026
2027	RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2028	{
2029		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2030	}
2031
2032	Type *UShort::getType()
2033	{
2034		return T(llvm::Type::getInt16Ty(*::context));
2035	}
2036
2037	Byte4::Byte4(RValue<Byte8> cast)
2038	{
2039		storeValue(Nucleus::createBitCast(cast.value, getType()));
2040	}
2041
2042	Byte4::Byte4(const Reference<Byte4> &rhs)
2043	{
2044		Value *value = rhs.loadValue();
2045		storeValue(value);
2046	}
2047
2048	Type *Byte4::getType()
2049	{
2050		return T(Type_v4i8);
2051	}
2052
2053	Type *SByte4::getType()
2054	{
2055		return T(Type_v4i8);
2056	}
2057
2058	Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2059	{
2060		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2061		storeValue(Nucleus::createConstantVector(constantVector, getType()));
2062	}
2063
2064	Byte8::Byte8(RValue<Byte8> rhs)
2065	{
2066		storeValue(rhs.value);
2067	}
2068
2069	Byte8::Byte8(const Byte8 &rhs)
2070	{
2071		Value *value = rhs.loadValue();
2072		storeValue(value);
2073	}
2074
2075	Byte8::Byte8(const Reference<Byte8> &rhs)
2076	{
2077		Value *value = rhs.loadValue();
2078		storeValue(value);
2079	}
2080
2081	RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2082	{
2083		storeValue(rhs.value);
2084
2085		return rhs;
2086	}
2087
2088	RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2089	{
2090		Value *value = rhs.loadValue();
2091		storeValue(value);
2092
2093		return RValue<Byte8>(value);
2094	}
2095
2096	RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2097	{
2098		Value *value = rhs.loadValue();
2099		storeValue(value);
2100
2101		return RValue<Byte8>(value);
2102	}
2103
2104	RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2105	{
2106		return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2107	}
2108
2109	RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2110	{
2111		return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2112	}
2113
2114//	RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2115//	{
2116//		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2117//	}
2118
2119//	RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2120//	{
2121//		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2122//	}
2123
2124//	RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2125//	{
2126//		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2127//	}
2128
2129	RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2130	{
2131		return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2132	}
2133
2134	RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2135	{
2136		return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2137	}
2138
2139	RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2140	{
2141		return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2142	}
2143
2144//	RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2145//	{
2146//		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2147//	}
2148
2149//	RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2150//	{
2151//		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2152//	}
2153
2154	RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2155	{
2156		return lhs = lhs + rhs;
2157	}
2158
2159	RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2160	{
2161		return lhs = lhs - rhs;
2162	}
2163
2164//	RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2165//	{
2166//		return lhs = lhs * rhs;
2167//	}
2168
2169//	RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2170//	{
2171//		return lhs = lhs / rhs;
2172//	}
2173
2174//	RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2175//	{
2176//		return lhs = lhs % rhs;
2177//	}
2178
2179	RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2180	{
2181		return lhs = lhs & rhs;
2182	}
2183
2184	RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2185	{
2186		return lhs = lhs | rhs;
2187	}
2188
2189	RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2190	{
2191		return lhs = lhs ^ rhs;
2192	}
2193
2194//	RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2195//	{
2196//		return lhs = lhs << rhs;
2197//	}
2198
2199//	RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2200//	{
2201//		return lhs = lhs >> rhs;
2202//	}
2203
2204//	RValue<Byte8> operator+(RValue<Byte8> val)
2205//	{
2206//		return val;
2207//	}
2208
2209//	RValue<Byte8> operator-(RValue<Byte8> val)
2210//	{
2211//		return RValue<Byte8>(Nucleus::createNeg(val.value));
2212//	}
2213
2214	RValue<Byte8> operator~(RValue<Byte8> val)
2215	{
2216		return RValue<Byte8>(Nucleus::createNot(val.value));
2217	}
2218
2219	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2220	{
2221		return x86::paddusb(x, y);
2222	}
2223
2224	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2225	{
2226		return x86::psubusb(x, y);
2227	}
2228
2229	RValue<Short4> Unpack(RValue<Byte4> x)
2230	{
2231		int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2232		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2233	}
2234
2235	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2236	{
2237		return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2238	}
2239
2240	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2241	{
2242		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2243		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2244	}
2245
2246	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2247	{
2248		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2249		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2250		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2251	}
2252
2253	RValue<Int> SignMask(RValue<Byte8> x)
2254	{
2255		return x86::pmovmskb(x);
2256	}
2257
2258//	RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2259//	{
2260//		return x86::pcmpgtb(x, y);   // FIXME: Signedness
2261//	}
2262
2263	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2264	{
2265		return x86::pcmpeqb(x, y);
2266	}
2267
2268	Type *Byte8::getType()
2269	{
2270		return T(Type_v8i8);
2271	}
2272
2273	SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2274	{
2275		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2276		Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2277
2278		storeValue(Nucleus::createBitCast(vector, getType()));
2279	}
2280
2281	SByte8::SByte8(RValue<SByte8> rhs)
2282	{
2283		storeValue(rhs.value);
2284	}
2285
2286	SByte8::SByte8(const SByte8 &rhs)
2287	{
2288		Value *value = rhs.loadValue();
2289		storeValue(value);
2290	}
2291
2292	SByte8::SByte8(const Reference<SByte8> &rhs)
2293	{
2294		Value *value = rhs.loadValue();
2295		storeValue(value);
2296	}
2297
2298	RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2299	{
2300		storeValue(rhs.value);
2301
2302		return rhs;
2303	}
2304
2305	RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2306	{
2307		Value *value = rhs.loadValue();
2308		storeValue(value);
2309
2310		return RValue<SByte8>(value);
2311	}
2312
2313	RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2314	{
2315		Value *value = rhs.loadValue();
2316		storeValue(value);
2317
2318		return RValue<SByte8>(value);
2319	}
2320
2321	RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2322	{
2323		return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2324	}
2325
2326	RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2327	{
2328		return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2329	}
2330
2331//	RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2332//	{
2333//		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2334//	}
2335
2336//	RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2337//	{
2338//		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2339//	}
2340
2341//	RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2342//	{
2343//		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2344//	}
2345
2346	RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2347	{
2348		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2349	}
2350
2351	RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2352	{
2353		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2354	}
2355
2356	RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2357	{
2358		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2359	}
2360
2361//	RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2362//	{
2363//		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2364//	}
2365
2366//	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2367//	{
2368//		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2369//	}
2370
2371	RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2372	{
2373		return lhs = lhs + rhs;
2374	}
2375
2376	RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2377	{
2378		return lhs = lhs - rhs;
2379	}
2380
2381//	RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2382//	{
2383//		return lhs = lhs * rhs;
2384//	}
2385
2386//	RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2387//	{
2388//		return lhs = lhs / rhs;
2389//	}
2390
2391//	RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2392//	{
2393//		return lhs = lhs % rhs;
2394//	}
2395
2396	RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2397	{
2398		return lhs = lhs & rhs;
2399	}
2400
2401	RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2402	{
2403		return lhs = lhs | rhs;
2404	}
2405
2406	RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2407	{
2408		return lhs = lhs ^ rhs;
2409	}
2410
2411//	RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2412//	{
2413//		return lhs = lhs << rhs;
2414//	}
2415
2416//	RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2417//	{
2418//		return lhs = lhs >> rhs;
2419//	}
2420
2421//	RValue<SByte8> operator+(RValue<SByte8> val)
2422//	{
2423//		return val;
2424//	}
2425
2426//	RValue<SByte8> operator-(RValue<SByte8> val)
2427//	{
2428//		return RValue<SByte8>(Nucleus::createNeg(val.value));
2429//	}
2430
2431	RValue<SByte8> operator~(RValue<SByte8> val)
2432	{
2433		return RValue<SByte8>(Nucleus::createNot(val.value));
2434	}
2435
2436	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2437	{
2438		return x86::paddsb(x, y);
2439	}
2440
2441	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2442	{
2443		return x86::psubsb(x, y);
2444	}
2445
2446	RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2447	{
2448		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2449		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2450	}
2451
2452	RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2453	{
2454		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2455		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2456		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2457	}
2458
2459	RValue<Int> SignMask(RValue<SByte8> x)
2460	{
2461		return x86::pmovmskb(As<Byte8>(x));
2462	}
2463
2464	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2465	{
2466		return x86::pcmpgtb(x, y);
2467	}
2468
2469	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2470	{
2471		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2472	}
2473
2474	Type *SByte8::getType()
2475	{
2476		return T(Type_v8i8);
2477	}
2478
2479	Byte16::Byte16(RValue<Byte16> rhs)
2480	{
2481		storeValue(rhs.value);
2482	}
2483
2484	Byte16::Byte16(const Byte16 &rhs)
2485	{
2486		Value *value = rhs.loadValue();
2487		storeValue(value);
2488	}
2489
2490	Byte16::Byte16(const Reference<Byte16> &rhs)
2491	{
2492		Value *value = rhs.loadValue();
2493		storeValue(value);
2494	}
2495
2496	RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2497	{
2498		storeValue(rhs.value);
2499
2500		return rhs;
2501	}
2502
2503	RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2504	{
2505		Value *value = rhs.loadValue();
2506		storeValue(value);
2507
2508		return RValue<Byte16>(value);
2509	}
2510
2511	RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2512	{
2513		Value *value = rhs.loadValue();
2514		storeValue(value);
2515
2516		return RValue<Byte16>(value);
2517	}
2518
2519	Type *Byte16::getType()
2520	{
2521		return T(llvm::VectorType::get(T(Byte::getType()), 16));
2522	}
2523
2524	Type *SByte16::getType()
2525	{
2526		return T(llvm::VectorType::get(T(SByte::getType()), 16));
2527	}
2528
2529	Short2::Short2(RValue<Short4> cast)
2530	{
2531		storeValue(Nucleus::createBitCast(cast.value, getType()));
2532	}
2533
2534	Type *Short2::getType()
2535	{
2536		return T(Type_v2i16);
2537	}
2538
2539	UShort2::UShort2(RValue<UShort4> cast)
2540	{
2541		storeValue(Nucleus::createBitCast(cast.value, getType()));
2542	}
2543
2544	Type *UShort2::getType()
2545	{
2546		return T(Type_v2i16);
2547	}
2548
2549	Short4::Short4(RValue<Int> cast)
2550	{
2551		Value *vector = loadValue();
2552		Value *element = Nucleus::createTrunc(cast.value, Short::getType());
2553		Value *insert = Nucleus::createInsertElement(vector, element, 0);
2554		Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
2555
2556		storeValue(swizzle);
2557	}
2558
2559	Short4::Short4(RValue<Int4> cast)
2560	{
2561		int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
2562		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2563
2564		Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2565		Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
2566
2567		storeValue(short4);
2568	}
2569
2570//	Short4::Short4(RValue<Float> cast)
2571//	{
2572//	}
2573
2574	Short4::Short4(RValue<Float4> cast)
2575	{
2576		Int4 v4i32 = Int4(cast);
2577		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2578
2579		storeValue(As<Short4>(Int2(v4i32)).value);
2580	}
2581
2582	Short4::Short4(short xyzw)
2583	{
2584		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2585		storeValue(Nucleus::createConstantVector(constantVector, getType()));
2586	}
2587
2588	Short4::Short4(short x, short y, short z, short w)
2589	{
2590		int64_t constantVector[4] = {x, y, z, w};
2591		storeValue(Nucleus::createConstantVector(constantVector, getType()));
2592	}
2593
2594	Short4::Short4(RValue<Short4> rhs)
2595	{
2596		storeValue(rhs.value);
2597	}
2598
2599	Short4::Short4(const Short4 &rhs)
2600	{
2601		Value *value = rhs.loadValue();
2602		storeValue(value);
2603	}
2604
2605	Short4::Short4(const Reference<Short4> &rhs)
2606	{
2607		Value *value = rhs.loadValue();
2608		storeValue(value);
2609	}
2610
2611	Short4::Short4(RValue<UShort4> rhs)
2612	{
2613		storeValue(rhs.value);
2614	}
2615
2616	Short4::Short4(const UShort4 &rhs)
2617	{
2618		storeValue(rhs.loadValue());
2619	}
2620
2621	Short4::Short4(const Reference<UShort4> &rhs)
2622	{
2623		storeValue(rhs.loadValue());
2624	}
2625
2626	RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2627	{
2628		storeValue(rhs.value);
2629
2630		return rhs;
2631	}
2632
2633	RValue<Short4> Short4::operator=(const Short4 &rhs)
2634	{
2635		Value *value = rhs.loadValue();
2636		storeValue(value);
2637
2638		return RValue<Short4>(value);
2639	}
2640
2641	RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
2642	{
2643		Value *value = rhs.loadValue();
2644		storeValue(value);
2645
2646		return RValue<Short4>(value);
2647	}
2648
2649	RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
2650	{
2651		storeValue(rhs.value);
2652
2653		return RValue<Short4>(rhs);
2654	}
2655
2656	RValue<Short4> Short4::operator=(const UShort4 &rhs)
2657	{
2658		Value *value = rhs.loadValue();
2659		storeValue(value);
2660
2661		return RValue<Short4>(value);
2662	}
2663
2664	RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
2665	{
2666		Value *value = rhs.loadValue();
2667		storeValue(value);
2668
2669		return RValue<Short4>(value);
2670	}
2671
2672	RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2673	{
2674		return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2675	}
2676
2677	RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2678	{
2679		return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2680	}
2681
2682	RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2683	{
2684		return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2685	}
2686
2687//	RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2688//	{
2689//		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2690//	}
2691
2692//	RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2693//	{
2694//		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2695//	}
2696
2697	RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2698	{
2699		return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2700	}
2701
2702	RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2703	{
2704		return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2705	}
2706
2707	RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2708	{
2709		return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2710	}
2711
2712	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2713	{
2714	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2715
2716		return x86::psllw(lhs, rhs);
2717	}
2718
2719	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2720	{
2721	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2722
2723		return x86::psraw(lhs, rhs);
2724	}
2725
2726	RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
2727	{
2728		return lhs = lhs + rhs;
2729	}
2730
2731	RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
2732	{
2733		return lhs = lhs - rhs;
2734	}
2735
2736	RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
2737	{
2738		return lhs = lhs * rhs;
2739	}
2740
2741//	RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
2742//	{
2743//		return lhs = lhs / rhs;
2744//	}
2745
2746//	RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
2747//	{
2748//		return lhs = lhs % rhs;
2749//	}
2750
2751	RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
2752	{
2753		return lhs = lhs & rhs;
2754	}
2755
2756	RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
2757	{
2758		return lhs = lhs | rhs;
2759	}
2760
2761	RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
2762	{
2763		return lhs = lhs ^ rhs;
2764	}
2765
2766	RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
2767	{
2768		return lhs = lhs << rhs;
2769	}
2770
2771	RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
2772	{
2773		return lhs = lhs >> rhs;
2774	}
2775
2776//	RValue<Short4> operator+(RValue<Short4> val)
2777//	{
2778//		return val;
2779//	}
2780
2781	RValue<Short4> operator-(RValue<Short4> val)
2782	{
2783		return RValue<Short4>(Nucleus::createNeg(val.value));
2784	}
2785
2786	RValue<Short4> operator~(RValue<Short4> val)
2787	{
2788		return RValue<Short4>(Nucleus::createNot(val.value));
2789	}
2790
2791	RValue<Short4> RoundShort4(RValue<Float4> cast)
2792	{
2793		RValue<Int4> int4 = RoundInt(cast);
2794		return As<Short4>(PackSigned(int4, int4));
2795	}
2796
2797	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2798	{
2799		return x86::pmaxsw(x, y);
2800	}
2801
2802	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2803	{
2804		return x86::pminsw(x, y);
2805	}
2806
2807	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2808	{
2809		return x86::paddsw(x, y);
2810	}
2811
2812	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2813	{
2814		return x86::psubsw(x, y);
2815	}
2816
2817	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2818	{
2819		return x86::pmulhw(x, y);
2820	}
2821
2822	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2823	{
2824		return x86::pmaddwd(x, y);
2825	}
2826
2827	RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
2828	{
2829		auto result = x86::packsswb(x, y);
2830
2831		return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
2832	}
2833
2834	RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
2835	{
2836		auto result = x86::packuswb(x, y);
2837
2838		return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
2839	}
2840
2841	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
2842	{
2843		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
2844		return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2845	}
2846
2847	RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
2848	{
2849		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
2850		auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2851		return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
2852	}
2853
2854	RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
2855	{
2856		// Real type is v8i16
2857		int shuffle[8] =
2858		{
2859			(select >> 0) & 0x03,
2860			(select >> 2) & 0x03,
2861			(select >> 4) & 0x03,
2862			(select >> 6) & 0x03,
2863			(select >> 0) & 0x03,
2864			(select >> 2) & 0x03,
2865			(select >> 4) & 0x03,
2866			(select >> 6) & 0x03,
2867		};
2868
2869		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2870	}
2871
2872	RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
2873	{
2874		return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
2875	}
2876
2877	RValue<Short> Extract(RValue<Short4> val, int i)
2878	{
2879		return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2880	}
2881
2882	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2883	{
2884		return x86::pcmpgtw(x, y);
2885	}
2886
2887	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2888	{
2889		return x86::pcmpeqw(x, y);
2890	}
2891
2892	Type *Short4::getType()
2893	{
2894		return T(Type_v4i16);
2895	}
2896
2897	UShort4::UShort4(RValue<Int4> cast)
2898	{
2899		*this = Short4(cast);
2900	}
2901
2902	UShort4::UShort4(RValue<Float4> cast, bool saturate)
2903	{
2904		if(saturate)
2905		{
2906			if(CPUID::supportsSSE4_1())
2907			{
2908				Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
2909				*this = As<Short4>(PackUnsigned(int4, int4));
2910			}
2911			else
2912			{
2913				*this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
2914			}
2915		}
2916		else
2917		{
2918			*this = Short4(Int4(cast));
2919		}
2920	}
2921
2922	UShort4::UShort4(unsigned short xyzw)
2923	{
2924		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2925		storeValue(Nucleus::createConstantVector(constantVector, getType()));
2926	}
2927
2928	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
2929	{
2930		int64_t constantVector[4] = {x, y, z, w};
2931		storeValue(Nucleus::createConstantVector(constantVector, getType()));
2932	}
2933
2934	UShort4::UShort4(RValue<UShort4> rhs)
2935	{
2936		storeValue(rhs.value);
2937	}
2938
2939	UShort4::UShort4(const UShort4 &rhs)
2940	{
2941		Value *value = rhs.loadValue();
2942		storeValue(value);
2943	}
2944
2945	UShort4::UShort4(const Reference<UShort4> &rhs)
2946	{
2947		Value *value = rhs.loadValue();
2948		storeValue(value);
2949	}
2950
2951	UShort4::UShort4(RValue<Short4> rhs)
2952	{
2953		storeValue(rhs.value);
2954	}
2955
2956	UShort4::UShort4(const Short4 &rhs)
2957	{
2958		Value *value = rhs.loadValue();
2959		storeValue(value);
2960	}
2961
2962	UShort4::UShort4(const Reference<Short4> &rhs)
2963	{
2964		Value *value = rhs.loadValue();
2965		storeValue(value);
2966	}
2967
2968	RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
2969	{
2970		storeValue(rhs.value);
2971
2972		return rhs;
2973	}
2974
2975	RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
2976	{
2977		Value *value = rhs.loadValue();
2978		storeValue(value);
2979
2980		return RValue<UShort4>(value);
2981	}
2982
2983	RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
2984	{
2985		Value *value = rhs.loadValue();
2986		storeValue(value);
2987
2988		return RValue<UShort4>(value);
2989	}
2990
2991	RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
2992	{
2993		storeValue(rhs.value);
2994
2995		return RValue<UShort4>(rhs);
2996	}
2997
2998	RValue<UShort4> UShort4::operator=(const Short4 &rhs)
2999	{
3000		Value *value = rhs.loadValue();
3001		storeValue(value);
3002
3003		return RValue<UShort4>(value);
3004	}
3005
3006	RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3007	{
3008		Value *value = rhs.loadValue();
3009		storeValue(value);
3010
3011		return RValue<UShort4>(value);
3012	}
3013
3014	RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3015	{
3016		return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3017	}
3018
3019	RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3020	{
3021		return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3022	}
3023
3024	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3025	{
3026		return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3027	}
3028
3029	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3030	{
3031		return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3032	}
3033
3034	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3035	{
3036		return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3037	}
3038
3039	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3040	{
3041		return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3042	}
3043
3044	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3045	{
3046	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3047
3048		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3049	}
3050
3051	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3052	{
3053	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3054
3055		return x86::psrlw(lhs, rhs);
3056	}
3057
3058	RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3059	{
3060		return lhs = lhs << rhs;
3061	}
3062
3063	RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3064	{
3065		return lhs = lhs >> rhs;
3066	}
3067
3068	RValue<UShort4> operator~(RValue<UShort4> val)
3069	{
3070		return RValue<UShort4>(Nucleus::createNot(val.value));
3071	}
3072
3073	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3074	{
3075		return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3076	}
3077
3078	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3079	{
3080		return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3081	}
3082
3083	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3084	{
3085		return x86::paddusw(x, y);
3086	}
3087
3088	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3089	{
3090		return x86::psubusw(x, y);
3091	}
3092
3093	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3094	{
3095		return x86::pmulhuw(x, y);
3096	}
3097
3098	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3099	{
3100		return x86::pavgw(x, y);
3101	}
3102
3103	Type *UShort4::getType()
3104	{
3105		return T(Type_v4i16);
3106	}
3107
3108	Short8::Short8(short c)
3109	{
3110		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3111		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3112	}
3113
3114	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3115	{
3116		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3117		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3118	}
3119
3120	Short8::Short8(RValue<Short8> rhs)
3121	{
3122		storeValue(rhs.value);
3123	}
3124
3125	Short8::Short8(const Reference<Short8> &rhs)
3126	{
3127		Value *value = rhs.loadValue();
3128		storeValue(value);
3129	}
3130
3131	Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3132	{
3133		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3134		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3135
3136		storeValue(packed);
3137	}
3138
3139	RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3140	{
3141		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3142	}
3143
3144	RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3145	{
3146		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3147	}
3148
3149	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3150	{
3151		return x86::psllw(lhs, rhs);   // FIXME: Fallback required
3152	}
3153
3154	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3155	{
3156		return x86::psraw(lhs, rhs);   // FIXME: Fallback required
3157	}
3158
3159	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3160	{
3161		return x86::pmaddwd(x, y);   // FIXME: Fallback required
3162	}
3163
3164	RValue<Int4> Abs(RValue<Int4> x)
3165	{
3166		auto negative = x >> 31;
3167		return (x ^ negative) - negative;
3168	}
3169
3170	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3171	{
3172		return x86::pmulhw(x, y);   // FIXME: Fallback required
3173	}
3174
3175	Type *Short8::getType()
3176	{
3177		return T(llvm::VectorType::get(T(Short::getType()), 8));
3178	}
3179
3180	UShort8::UShort8(unsigned short c)
3181	{
3182		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3183		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3184	}
3185
3186	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3187	{
3188		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3189		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3190	}
3191
3192	UShort8::UShort8(RValue<UShort8> rhs)
3193	{
3194		storeValue(rhs.value);
3195	}
3196
3197	UShort8::UShort8(const Reference<UShort8> &rhs)
3198	{
3199		Value *value = rhs.loadValue();
3200		storeValue(value);
3201	}
3202
3203	UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3204	{
3205		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3206		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3207
3208		storeValue(packed);
3209	}
3210
3211	RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3212	{
3213		storeValue(rhs.value);
3214
3215		return rhs;
3216	}
3217
3218	RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3219	{
3220		Value *value = rhs.loadValue();
3221		storeValue(value);
3222
3223		return RValue<UShort8>(value);
3224	}
3225
3226	RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3227	{
3228		Value *value = rhs.loadValue();
3229		storeValue(value);
3230
3231		return RValue<UShort8>(value);
3232	}
3233
3234	RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3235	{
3236		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3237	}
3238
3239	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3240	{
3241		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));   // FIXME: Fallback required
3242	}
3243
3244	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3245	{
3246		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
3247	}
3248
3249	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3250	{
3251		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3252	}
3253
3254	RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3255	{
3256		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3257	}
3258
3259	RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3260	{
3261		return lhs = lhs + rhs;
3262	}
3263
3264	RValue<UShort8> operator~(RValue<UShort8> val)
3265	{
3266		return RValue<UShort8>(Nucleus::createNot(val.value));
3267	}
3268
3269	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3270	{
3271		int pshufb[16] =
3272		{
3273			select0 + 0,
3274			select0 + 1,
3275			select1 + 0,
3276			select1 + 1,
3277			select2 + 0,
3278			select2 + 1,
3279			select3 + 0,
3280			select3 + 1,
3281			select4 + 0,
3282			select4 + 1,
3283			select5 + 0,
3284			select5 + 1,
3285			select6 + 0,
3286			select6 + 1,
3287			select7 + 0,
3288			select7 + 1,
3289		};
3290
3291		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3292		Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
3293		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3294
3295		return RValue<UShort8>(short8);
3296	}
3297
3298	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3299	{
3300		return x86::pmulhuw(x, y);   // FIXME: Fallback required
3301	}
3302
3303	Type *UShort8::getType()
3304	{
3305		return T(llvm::VectorType::get(T(UShort::getType()), 8));
3306	}
3307
3308	Int::Int(Argument<Int> argument)
3309	{
3310		storeValue(argument.value);
3311	}
3312
3313	Int::Int(RValue<Byte> cast)
3314	{
3315		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3316
3317		storeValue(integer);
3318	}
3319
3320	Int::Int(RValue<SByte> cast)
3321	{
3322		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3323
3324		storeValue(integer);
3325	}
3326
3327	Int::Int(RValue<Short> cast)
3328	{
3329		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3330
3331		storeValue(integer);
3332	}
3333
3334	Int::Int(RValue<UShort> cast)
3335	{
3336		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3337
3338		storeValue(integer);
3339	}
3340
3341	Int::Int(RValue<Int2> cast)
3342	{
3343		*this = Extract(cast, 0);
3344	}
3345
3346	Int::Int(RValue<Long> cast)
3347	{
3348		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3349
3350		storeValue(integer);
3351	}
3352
3353	Int::Int(RValue<Float> cast)
3354	{
3355		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3356
3357		storeValue(integer);
3358	}
3359
3360	Int::Int(int x)
3361	{
3362		storeValue(Nucleus::createConstantInt(x));
3363	}
3364
3365	Int::Int(RValue<Int> rhs)
3366	{
3367		storeValue(rhs.value);
3368	}
3369
3370	Int::Int(RValue<UInt> rhs)
3371	{
3372		storeValue(rhs.value);
3373	}
3374
3375	Int::Int(const Int &rhs)
3376	{
3377		Value *value = rhs.loadValue();
3378		storeValue(value);
3379	}
3380
3381	Int::Int(const Reference<Int> &rhs)
3382	{
3383		Value *value = rhs.loadValue();
3384		storeValue(value);
3385	}
3386
3387	Int::Int(const UInt &rhs)
3388	{
3389		Value *value = rhs.loadValue();
3390		storeValue(value);
3391	}
3392
3393	Int::Int(const Reference<UInt> &rhs)
3394	{
3395		Value *value = rhs.loadValue();
3396		storeValue(value);
3397	}
3398
3399	RValue<Int> Int::operator=(int rhs)
3400	{
3401		return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3402	}
3403
3404	RValue<Int> Int::operator=(RValue<Int> rhs)
3405	{
3406		storeValue(rhs.value);
3407
3408		return rhs;
3409	}
3410
3411	RValue<Int> Int::operator=(RValue<UInt> rhs)
3412	{
3413		storeValue(rhs.value);
3414
3415		return RValue<Int>(rhs);
3416	}
3417
3418	RValue<Int> Int::operator=(const Int &rhs)
3419	{
3420		Value *value = rhs.loadValue();
3421		storeValue(value);
3422
3423		return RValue<Int>(value);
3424	}
3425
3426	RValue<Int> Int::operator=(const Reference<Int> &rhs)
3427	{
3428		Value *value = rhs.loadValue();
3429		storeValue(value);
3430
3431		return RValue<Int>(value);
3432	}
3433
3434	RValue<Int> Int::operator=(const UInt &rhs)
3435	{
3436		Value *value = rhs.loadValue();
3437		storeValue(value);
3438
3439		return RValue<Int>(value);
3440	}
3441
3442	RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3443	{
3444		Value *value = rhs.loadValue();
3445		storeValue(value);
3446
3447		return RValue<Int>(value);
3448	}
3449
3450	RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3451	{
3452		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3453	}
3454
3455	RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3456	{
3457		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3458	}
3459
3460	RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3461	{
3462		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3463	}
3464
3465	RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3466	{
3467		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3468	}
3469
3470	RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3471	{
3472		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3473	}
3474
3475	RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3476	{
3477		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3478	}
3479
3480	RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3481	{
3482		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3483	}
3484
3485	RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3486	{
3487		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3488	}
3489
3490	RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3491	{
3492		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3493	}
3494
3495	RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3496	{
3497		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3498	}
3499
3500	RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3501	{
3502		return lhs = lhs + rhs;
3503	}
3504
3505	RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3506	{
3507		return lhs = lhs - rhs;
3508	}
3509
3510	RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3511	{
3512		return lhs = lhs * rhs;
3513	}
3514
3515	RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3516	{
3517		return lhs = lhs / rhs;
3518	}
3519
3520	RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3521	{
3522		return lhs = lhs % rhs;
3523	}
3524
3525	RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3526	{
3527		return lhs = lhs & rhs;
3528	}
3529
3530	RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3531	{
3532		return lhs = lhs | rhs;
3533	}
3534
3535	RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3536	{
3537		return lhs = lhs ^ rhs;
3538	}
3539
3540	RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3541	{
3542		return lhs = lhs << rhs;
3543	}
3544
3545	RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3546	{
3547		return lhs = lhs >> rhs;
3548	}
3549
3550	RValue<Int> operator+(RValue<Int> val)
3551	{
3552		return val;
3553	}
3554
3555	RValue<Int> operator-(RValue<Int> val)
3556	{
3557		return RValue<Int>(Nucleus::createNeg(val.value));
3558	}
3559
3560	RValue<Int> operator~(RValue<Int> val)
3561	{
3562		return RValue<Int>(Nucleus::createNot(val.value));
3563	}
3564
3565	RValue<Int> operator++(Int &val, int)   // Post-increment
3566	{
3567		RValue<Int> res = val;
3568
3569		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3570		val.storeValue(inc);
3571
3572		return res;
3573	}
3574
3575	const Int &operator++(Int &val)   // Pre-increment
3576	{
3577		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3578		val.storeValue(inc);
3579
3580		return val;
3581	}
3582
3583	RValue<Int> operator--(Int &val, int)   // Post-decrement
3584	{
3585		RValue<Int> res = val;
3586
3587		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
3588		val.storeValue(inc);
3589
3590		return res;
3591	}
3592
3593	const Int &operator--(Int &val)   // Pre-decrement
3594	{
3595		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
3596		val.storeValue(inc);
3597
3598		return val;
3599	}
3600
3601	RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
3602	{
3603		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
3604	}
3605
3606	RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
3607	{
3608		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
3609	}
3610
3611	RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
3612	{
3613		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
3614	}
3615
3616	RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
3617	{
3618		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
3619	}
3620
3621	RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
3622	{
3623		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
3624	}
3625
3626	RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
3627	{
3628		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
3629	}
3630
3631	RValue<Int> Max(RValue<Int> x, RValue<Int> y)
3632	{
3633		return IfThenElse(x > y, x, y);
3634	}
3635
3636	RValue<Int> Min(RValue<Int> x, RValue<Int> y)
3637	{
3638		return IfThenElse(x < y, x, y);
3639	}
3640
3641	RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
3642	{
3643		return Min(Max(x, min), max);
3644	}
3645
3646	RValue<Int> RoundInt(RValue<Float> cast)
3647	{
3648		return x86::cvtss2si(cast);
3649
3650	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
3651	}
3652
3653	Type *Int::getType()
3654	{
3655		return T(llvm::Type::getInt32Ty(*::context));
3656	}
3657
3658	Long::Long(RValue<Int> cast)
3659	{
3660		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
3661
3662		storeValue(integer);
3663	}
3664
3665	Long::Long(RValue<UInt> cast)
3666	{
3667		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
3668
3669		storeValue(integer);
3670	}
3671
3672	Long::Long(RValue<Long> rhs)
3673	{
3674		storeValue(rhs.value);
3675	}
3676
3677	RValue<Long> Long::operator=(int64_t rhs)
3678	{
3679		return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
3680	}
3681
3682	RValue<Long> Long::operator=(RValue<Long> rhs)
3683	{
3684		storeValue(rhs.value);
3685
3686		return rhs;
3687	}
3688
3689	RValue<Long> Long::operator=(const Long &rhs)
3690	{
3691		Value *value = rhs.loadValue();
3692		storeValue(value);
3693
3694		return RValue<Long>(value);
3695	}
3696
3697	RValue<Long> Long::operator=(const Reference<Long> &rhs)
3698	{
3699		Value *value = rhs.loadValue();
3700		storeValue(value);
3701
3702		return RValue<Long>(value);
3703	}
3704
3705	RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
3706	{
3707		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
3708	}
3709
3710	RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
3711	{
3712		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
3713	}
3714
3715	RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
3716	{
3717		return lhs = lhs + rhs;
3718	}
3719
3720	RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
3721	{
3722		return lhs = lhs - rhs;
3723	}
3724
3725	RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
3726	{
3727		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
3728	}
3729
3730	Type *Long::getType()
3731	{
3732		return T(llvm::Type::getInt64Ty(*::context));
3733	}
3734
3735	UInt::UInt(Argument<UInt> argument)
3736	{
3737		storeValue(argument.value);
3738	}
3739
3740	UInt::UInt(RValue<UShort> cast)
3741	{
3742		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
3743
3744		storeValue(integer);
3745	}
3746
3747	UInt::UInt(RValue<Long> cast)
3748	{
3749		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
3750
3751		storeValue(integer);
3752	}
3753
3754	UInt::UInt(RValue<Float> cast)
3755	{
3756		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
3757		// Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
3758
3759		// Smallest positive value representable in UInt, but not in Int
3760		const unsigned int ustart = 0x80000000u;
3761		const float ustartf = float(ustart);
3762
3763		// If the value is negative, store 0, otherwise store the result of the conversion
3764		storeValue((~(As<Int>(cast) >> 31) &
3765		// Check if the value can be represented as an Int
3766			IfThenElse(cast >= ustartf,
3767		// If the value is too large, subtract ustart and re-add it after conversion.
3768				As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3769		// Otherwise, just convert normally
3770				Int(cast))).value);
3771	}
3772
3773	UInt::UInt(int x)
3774	{
3775		storeValue(Nucleus::createConstantInt(x));
3776	}
3777
3778	UInt::UInt(unsigned int x)
3779	{
3780		storeValue(Nucleus::createConstantInt(x));
3781	}
3782
3783	UInt::UInt(RValue<UInt> rhs)
3784	{
3785		storeValue(rhs.value);
3786	}
3787
3788	UInt::UInt(RValue<Int> rhs)
3789	{
3790		storeValue(rhs.value);
3791	}
3792
3793	UInt::UInt(const UInt &rhs)
3794	{
3795		Value *value = rhs.loadValue();
3796		storeValue(value);
3797	}
3798
3799	UInt::UInt(const Reference<UInt> &rhs)
3800	{
3801		Value *value = rhs.loadValue();
3802		storeValue(value);
3803	}
3804
3805	UInt::UInt(const Int &rhs)
3806	{
3807		Value *value = rhs.loadValue();
3808		storeValue(value);
3809	}
3810
3811	UInt::UInt(const Reference<Int> &rhs)
3812	{
3813		Value *value = rhs.loadValue();
3814		storeValue(value);
3815	}
3816
3817	RValue<UInt> UInt::operator=(unsigned int rhs)
3818	{
3819		return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
3820	}
3821
3822	RValue<UInt> UInt::operator=(RValue<UInt> rhs)
3823	{
3824		storeValue(rhs.value);
3825
3826		return rhs;
3827	}
3828
3829	RValue<UInt> UInt::operator=(RValue<Int> rhs)
3830	{
3831		storeValue(rhs.value);
3832
3833		return RValue<UInt>(rhs);
3834	}
3835
3836	RValue<UInt> UInt::operator=(const UInt &rhs)
3837	{
3838		Value *value = rhs.loadValue();
3839		storeValue(value);
3840
3841		return RValue<UInt>(value);
3842	}
3843
3844	RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
3845	{
3846		Value *value = rhs.loadValue();
3847		storeValue(value);
3848
3849		return RValue<UInt>(value);
3850	}
3851
3852	RValue<UInt> UInt::operator=(const Int &rhs)
3853	{
3854		Value *value = rhs.loadValue();
3855		storeValue(value);
3856
3857		return RValue<UInt>(value);
3858	}
3859
3860	RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
3861	{
3862		Value *value = rhs.loadValue();
3863		storeValue(value);
3864
3865		return RValue<UInt>(value);
3866	}
3867
3868	RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
3869	{
3870		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
3871	}
3872
3873	RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
3874	{
3875		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
3876	}
3877
3878	RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
3879	{
3880		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
3881	}
3882
3883	RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
3884	{
3885		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
3886	}
3887
3888	RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
3889	{
3890		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
3891	}
3892
3893	RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
3894	{
3895		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
3896	}
3897
3898	RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
3899	{
3900		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
3901	}
3902
3903	RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
3904	{
3905		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
3906	}
3907
3908	RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
3909	{
3910		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
3911	}
3912
3913	RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
3914	{
3915		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
3916	}
3917
3918	RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
3919	{
3920		return lhs = lhs + rhs;
3921	}
3922
3923	RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
3924	{
3925		return lhs = lhs - rhs;
3926	}
3927
3928	RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
3929	{
3930		return lhs = lhs * rhs;
3931	}
3932
3933	RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
3934	{
3935		return lhs = lhs / rhs;
3936	}
3937
3938	RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
3939	{
3940		return lhs = lhs % rhs;
3941	}
3942
3943	RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
3944	{
3945		return lhs = lhs & rhs;
3946	}
3947
3948	RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
3949	{
3950		return lhs = lhs | rhs;
3951	}
3952
3953	RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
3954	{
3955		return lhs = lhs ^ rhs;
3956	}
3957
3958	RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
3959	{
3960		return lhs = lhs << rhs;
3961	}
3962
3963	RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
3964	{
3965		return lhs = lhs >> rhs;
3966	}
3967
3968	RValue<UInt> operator+(RValue<UInt> val)
3969	{
3970		return val;
3971	}
3972
3973	RValue<UInt> operator-(RValue<UInt> val)
3974	{
3975		return RValue<UInt>(Nucleus::createNeg(val.value));
3976	}
3977
3978	RValue<UInt> operator~(RValue<UInt> val)
3979	{
3980		return RValue<UInt>(Nucleus::createNot(val.value));
3981	}
3982
3983	RValue<UInt> operator++(UInt &val, int)   // Post-increment
3984	{
3985		RValue<UInt> res = val;
3986
3987		Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3988		val.storeValue(inc);
3989
3990		return res;
3991	}
3992
3993	const UInt &operator++(UInt &val)   // Pre-increment
3994	{
3995		Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3996		val.storeValue(inc);
3997
3998		return val;
3999	}
4000
4001	RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4002	{
4003		RValue<UInt> res = val;
4004
4005		Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
4006		val.storeValue(inc);
4007
4008		return res;
4009	}
4010
4011	const UInt &operator--(UInt &val)   // Pre-decrement
4012	{
4013		Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
4014		val.storeValue(inc);
4015
4016		return val;
4017	}
4018
4019	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4020	{
4021		return IfThenElse(x > y, x, y);
4022	}
4023
4024	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4025	{
4026		return IfThenElse(x < y, x, y);
4027	}
4028
4029	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4030	{
4031		return Min(Max(x, min), max);
4032	}
4033
4034	RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4035	{
4036		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4037	}
4038
4039	RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4040	{
4041		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4042	}
4043
4044	RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4045	{
4046		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4047	}
4048
4049	RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4050	{
4051		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4052	}
4053
4054	RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4055	{
4056		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4057	}
4058
4059	RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4060	{
4061		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4062	}
4063
4064//	RValue<UInt> RoundUInt(RValue<Float> cast)
4065//	{
4066//		return x86::cvtss2si(val);   // FIXME: Unsigned
4067//
4068//	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4069//	}
4070
4071	Type *UInt::getType()
4072	{
4073		return T(llvm::Type::getInt32Ty(*::context));
4074	}
4075
4076//	Int2::Int2(RValue<Int> cast)
4077//	{
4078//		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4079//		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4080//
4081//		int shuffle[2] = {0, 0};
4082//		Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4083//
4084//		storeValue(replicate);
4085//	}
4086
4087	Int2::Int2(RValue<Int4> cast)
4088	{
4089		storeValue(Nucleus::createBitCast(cast.value, getType()));
4090	}
4091
4092	Int2::Int2(int x, int y)
4093	{
4094		int64_t constantVector[2] = {x, y};
4095		storeValue(Nucleus::createConstantVector(constantVector, getType()));
4096	}
4097
4098	Int2::Int2(RValue<Int2> rhs)
4099	{
4100		storeValue(rhs.value);
4101	}
4102
4103	Int2::Int2(const Int2 &rhs)
4104	{
4105		Value *value = rhs.loadValue();
4106		storeValue(value);
4107	}
4108
4109	Int2::Int2(const Reference<Int2> &rhs)
4110	{
4111		Value *value = rhs.loadValue();
4112		storeValue(value);
4113	}
4114
4115	Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4116	{
4117		int shuffle[4] = {0, 4, 1, 5};
4118		Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4119
4120		storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4121	}
4122
4123	RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4124	{
4125		storeValue(rhs.value);
4126
4127		return rhs;
4128	}
4129
4130	RValue<Int2> Int2::operator=(const Int2 &rhs)
4131	{
4132		Value *value = rhs.loadValue();
4133		storeValue(value);
4134
4135		return RValue<Int2>(value);
4136	}
4137
4138	RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4139	{
4140		Value *value = rhs.loadValue();
4141		storeValue(value);
4142
4143		return RValue<Int2>(value);
4144	}
4145
4146	RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4147	{
4148		return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4149	}
4150
4151	RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4152	{
4153		return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4154	}
4155
4156//	RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4157//	{
4158//		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4159//	}
4160
4161//	RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4162//	{
4163//		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4164//	}
4165
4166//	RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4167//	{
4168//		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4169//	}
4170
4171	RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4172	{
4173		return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4174	}
4175
4176	RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4177	{
4178		return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4179	}
4180
4181	RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4182	{
4183		return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4184	}
4185
4186	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4187	{
4188	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4189
4190		return x86::pslld(lhs, rhs);
4191	}
4192
4193	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4194	{
4195	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4196
4197		return x86::psrad(lhs, rhs);
4198	}
4199
4200	RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4201	{
4202		return lhs = lhs + rhs;
4203	}
4204
4205	RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4206	{
4207		return lhs = lhs - rhs;
4208	}
4209
4210//	RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4211//	{
4212//		return lhs = lhs * rhs;
4213//	}
4214
4215//	RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4216//	{
4217//		return lhs = lhs / rhs;
4218//	}
4219
4220//	RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4221//	{
4222//		return lhs = lhs % rhs;
4223//	}
4224
4225	RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4226	{
4227		return lhs = lhs & rhs;
4228	}
4229
4230	RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4231	{
4232		return lhs = lhs | rhs;
4233	}
4234
4235	RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4236	{
4237		return lhs = lhs ^ rhs;
4238	}
4239
4240	RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4241	{
4242		return lhs = lhs << rhs;
4243	}
4244
4245	RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4246	{
4247		return lhs = lhs >> rhs;
4248	}
4249
4250//	RValue<Int2> operator+(RValue<Int2> val)
4251//	{
4252//		return val;
4253//	}
4254
4255//	RValue<Int2> operator-(RValue<Int2> val)
4256//	{
4257//		return RValue<Int2>(Nucleus::createNeg(val.value));
4258//	}
4259
4260	RValue<Int2> operator~(RValue<Int2> val)
4261	{
4262		return RValue<Int2>(Nucleus::createNot(val.value));
4263	}
4264
4265	RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4266	{
4267		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4268		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4269	}
4270
4271	RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4272	{
4273		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4274		auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4275		return As<Short4>(Swizzle(lowHigh, 0xEE));
4276	}
4277
4278	RValue<Int> Extract(RValue<Int2> val, int i)
4279	{
4280		return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4281	}
4282
4283	RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4284	{
4285		return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4286	}
4287
4288	Type *Int2::getType()
4289	{
4290		return T(Type_v2i32);
4291	}
4292
4293	UInt2::UInt2(unsigned int x, unsigned int y)
4294	{
4295		int64_t constantVector[2] = {x, y};
4296		storeValue(Nucleus::createConstantVector(constantVector, getType()));
4297	}
4298
4299	UInt2::UInt2(RValue<UInt2> rhs)
4300	{
4301		storeValue(rhs.value);
4302	}
4303
4304	UInt2::UInt2(const UInt2 &rhs)
4305	{
4306		Value *value = rhs.loadValue();
4307		storeValue(value);
4308	}
4309
4310	UInt2::UInt2(const Reference<UInt2> &rhs)
4311	{
4312		Value *value = rhs.loadValue();
4313		storeValue(value);
4314	}
4315
4316	RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4317	{
4318		storeValue(rhs.value);
4319
4320		return rhs;
4321	}
4322
4323	RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4324	{
4325		Value *value = rhs.loadValue();
4326		storeValue(value);
4327
4328		return RValue<UInt2>(value);
4329	}
4330
4331	RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4332	{
4333		Value *value = rhs.loadValue();
4334		storeValue(value);
4335
4336		return RValue<UInt2>(value);
4337	}
4338
4339	RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4340	{
4341		return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4342	}
4343
4344	RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4345	{
4346		return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4347	}
4348
4349//	RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4350//	{
4351//		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4352//	}
4353
4354//	RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4355//	{
4356//		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4357//	}
4358
4359//	RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4360//	{
4361//		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4362//	}
4363
4364	RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4365	{
4366		return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4367	}
4368
4369	RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4370	{
4371		return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4372	}
4373
4374	RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4375	{
4376		return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4377	}
4378
4379	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4380	{
4381	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
4382
4383		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
4384	}
4385
4386	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4387	{
4388	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
4389
4390		return x86::psrld(lhs, rhs);
4391	}
4392
4393	RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4394	{
4395		return lhs = lhs + rhs;
4396	}
4397
4398	RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4399	{
4400		return lhs = lhs - rhs;
4401	}
4402
4403//	RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4404//	{
4405//		return lhs = lhs * rhs;
4406//	}
4407
4408//	RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4409//	{
4410//		return lhs = lhs / rhs;
4411//	}
4412
4413//	RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4414//	{
4415//		return lhs = lhs % rhs;
4416//	}
4417
4418	RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4419	{
4420		return lhs = lhs & rhs;
4421	}
4422
4423	RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4424	{
4425		return lhs = lhs | rhs;
4426	}
4427
4428	RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4429	{
4430		return lhs = lhs ^ rhs;
4431	}
4432
4433	RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4434	{
4435		return lhs = lhs << rhs;
4436	}
4437
4438	RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4439	{
4440		return lhs = lhs >> rhs;
4441	}
4442
4443//	RValue<UInt2> operator+(RValue<UInt2> val)
4444//	{
4445//		return val;
4446//	}
4447
4448//	RValue<UInt2> operator-(RValue<UInt2> val)
4449//	{
4450//		return RValue<UInt2>(Nucleus::createNeg(val.value));
4451//	}
4452
4453	RValue<UInt2> operator~(RValue<UInt2> val)
4454	{
4455		return RValue<UInt2>(Nucleus::createNot(val.value));
4456	}
4457
4458	Type *UInt2::getType()
4459	{
4460		return T(Type_v2i32);
4461	}
4462
4463	Int4::Int4() : XYZW(this)
4464	{
4465	}
4466
4467	Int4::Int4(RValue<Byte4> cast) : XYZW(this)
4468	{
4469		if(CPUID::supportsSSE4_1())
4470		{
4471			*this = x86::pmovzxbd(As<Byte16>(cast));
4472		}
4473		else
4474		{
4475			int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4476			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
4477			Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4478
4479			int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4480			Value *c = Nucleus::createBitCast(b, Short8::getType());
4481			Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4482
4483			*this = As<Int4>(d);
4484		}
4485	}
4486
4487	Int4::Int4(RValue<SByte4> cast) : XYZW(this)
4488	{
4489		if(CPUID::supportsSSE4_1())
4490		{
4491			*this = x86::pmovsxbd(As<SByte16>(cast));
4492		}
4493		else
4494		{
4495			int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4496			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
4497			Value *b = Nucleus::createShuffleVector(a, a, swizzle);
4498
4499			int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4500			Value *c = Nucleus::createBitCast(b, Short8::getType());
4501			Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
4502
4503			*this = As<Int4>(d) >> 24;
4504		}
4505	}
4506
4507	Int4::Int4(RValue<Float4> cast) : XYZW(this)
4508	{
4509		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4510
4511		storeValue(xyzw);
4512	}
4513
4514	Int4::Int4(RValue<Short4> cast) : XYZW(this)
4515	{
4516		if(CPUID::supportsSSE4_1())
4517		{
4518			*this = x86::pmovsxwd(As<Short8>(cast));
4519		}
4520		else
4521		{
4522			int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4523			Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
4524			*this = As<Int4>(c) >> 16;
4525		}
4526	}
4527
4528	Int4::Int4(RValue<UShort4> cast) : XYZW(this)
4529	{
4530		if(CPUID::supportsSSE4_1())
4531		{
4532			*this = x86::pmovzxwd(As<UShort8>(cast));
4533		}
4534		else
4535		{
4536			int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4537			Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
4538			*this = As<Int4>(c);
4539		}
4540	}
4541
4542	Int4::Int4(int xyzw) : XYZW(this)
4543	{
4544		constant(xyzw, xyzw, xyzw, xyzw);
4545	}
4546
4547	Int4::Int4(int x, int yzw) : XYZW(this)
4548	{
4549		constant(x, yzw, yzw, yzw);
4550	}
4551
4552	Int4::Int4(int x, int y, int zw) : XYZW(this)
4553	{
4554		constant(x, y, zw, zw);
4555	}
4556
4557	Int4::Int4(int x, int y, int z, int w) : XYZW(this)
4558	{
4559		constant(x, y, z, w);
4560	}
4561
4562	void Int4::constant(int x, int y, int z, int w)
4563	{
4564		int64_t constantVector[4] = {x, y, z, w};
4565		storeValue(Nucleus::createConstantVector(constantVector, getType()));
4566	}
4567
4568	Int4::Int4(RValue<Int4> rhs) : XYZW(this)
4569	{
4570		storeValue(rhs.value);
4571	}
4572
4573	Int4::Int4(const Int4 &rhs) : XYZW(this)
4574	{
4575		Value *value = rhs.loadValue();
4576		storeValue(value);
4577	}
4578
4579	Int4::Int4(const Reference<Int4> &rhs) : XYZW(this)
4580	{
4581		Value *value = rhs.loadValue();
4582		storeValue(value);
4583	}
4584
4585	Int4::Int4(RValue<UInt4> rhs) : XYZW(this)
4586	{
4587		storeValue(rhs.value);
4588	}
4589
4590	Int4::Int4(const UInt4 &rhs) : XYZW(this)
4591	{
4592		Value *value = rhs.loadValue();
4593		storeValue(value);
4594	}
4595
4596	Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this)
4597	{
4598		Value *value = rhs.loadValue();
4599		storeValue(value);
4600	}
4601
4602	Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this)
4603	{
4604		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
4605		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4606
4607		storeValue(packed);
4608	}
4609
4610	Int4::Int4(RValue<Int> rhs) : XYZW(this)
4611	{
4612		Value *vector = loadValue();
4613		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
4614
4615		int swizzle[4] = {0, 0, 0, 0};
4616		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
4617
4618		storeValue(replicate);
4619	}
4620
4621	Int4::Int4(const Int &rhs) : XYZW(this)
4622	{
4623		*this = RValue<Int>(rhs.loadValue());
4624	}
4625
4626	Int4::Int4(const Reference<Int> &rhs) : XYZW(this)
4627	{
4628		*this = RValue<Int>(rhs.loadValue());
4629	}
4630
4631	RValue<Int4> Int4::operator=(RValue<Int4> rhs)
4632	{
4633		storeValue(rhs.value);
4634
4635		return rhs;
4636	}
4637
4638	RValue<Int4> Int4::operator=(const Int4 &rhs)
4639	{
4640		Value *value = rhs.loadValue();
4641		storeValue(value);
4642
4643		return RValue<Int4>(value);
4644	}
4645
4646	RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
4647	{
4648		Value *value = rhs.loadValue();
4649		storeValue(value);
4650
4651		return RValue<Int4>(value);
4652	}
4653
4654	RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
4655	{
4656		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
4657	}
4658
4659	RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
4660	{
4661		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
4662	}
4663
4664	RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
4665	{
4666		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
4667	}
4668
4669	RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
4670	{
4671		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
4672	}
4673
4674	RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
4675	{
4676		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
4677	}
4678
4679	RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
4680	{
4681		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
4682	}
4683
4684	RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
4685	{
4686		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
4687	}
4688
4689	RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
4690	{
4691		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
4692	}
4693
4694	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
4695	{
4696		return x86::pslld(lhs, rhs);
4697	}
4698
4699	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
4700	{
4701		return x86::psrad(lhs, rhs);
4702	}
4703
4704	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
4705	{
4706		return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
4707	}
4708
4709	RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
4710	{
4711		return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
4712	}
4713
4714	RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
4715	{
4716		return lhs = lhs + rhs;
4717	}
4718
4719	RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
4720	{
4721		return lhs = lhs - rhs;
4722	}
4723
4724	RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
4725	{
4726		return lhs = lhs * rhs;
4727	}
4728
4729//	RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
4730//	{
4731//		return lhs = lhs / rhs;
4732//	}
4733
4734//	RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
4735//	{
4736//		return lhs = lhs % rhs;
4737//	}
4738
4739	RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
4740	{
4741		return lhs = lhs & rhs;
4742	}
4743
4744	RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
4745	{
4746		return lhs = lhs | rhs;
4747	}
4748
4749	RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
4750	{
4751		return lhs = lhs ^ rhs;
4752	}
4753
4754	RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
4755	{
4756		return lhs = lhs << rhs;
4757	}
4758
4759	RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
4760	{
4761		return lhs = lhs >> rhs;
4762	}
4763
4764	RValue<Int4> operator+(RValue<Int4> val)
4765	{
4766		return val;
4767	}
4768
4769	RValue<Int4> operator-(RValue<Int4> val)
4770	{
4771		return RValue<Int4>(Nucleus::createNeg(val.value));
4772	}
4773
4774	RValue<Int4> operator~(RValue<Int4> val)
4775	{
4776		return RValue<Int4>(Nucleus::createNot(val.value));
4777	}
4778
4779	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
4780	{
4781		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4782		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
4783		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
4784		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4785	}
4786
4787	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
4788	{
4789		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4790		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
4791		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
4792		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4793	}
4794
4795	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
4796	{
4797		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4798		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
4799		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
4800		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4801	}
4802
4803	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
4804	{
4805		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4806		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
4807		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
4808		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4809	}
4810
4811	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
4812	{
4813		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4814		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
4815		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
4816		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4817	}
4818
4819	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
4820	{
4821		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4822		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
4823		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
4824		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4825	}
4826
4827	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
4828	{
4829		if(CPUID::supportsSSE4_1())
4830		{
4831			return x86::pmaxsd(x, y);
4832		}
4833		else
4834		{
4835			RValue<Int4> greater = CmpNLE(x, y);
4836			return (x & greater) | (y & ~greater);
4837		}
4838	}
4839
4840	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
4841	{
4842		if(CPUID::supportsSSE4_1())
4843		{
4844			return x86::pminsd(x, y);
4845		}
4846		else
4847		{
4848			RValue<Int4> less = CmpLT(x, y);
4849			return (x & less) | (y & ~less);
4850		}
4851	}
4852
4853	RValue<Int4> RoundInt(RValue<Float4> cast)
4854	{
4855		return x86::cvtps2dq(cast);
4856	}
4857
4858	RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
4859	{
4860		return x86::packssdw(x, y);
4861	}
4862
4863	RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
4864	{
4865		return x86::packusdw(x, y);
4866	}
4867
4868	RValue<Int> Extract(RValue<Int4> x, int i)
4869	{
4870		return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
4871	}
4872
4873	RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
4874	{
4875		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
4876	}
4877
4878	RValue<Int> SignMask(RValue<Int4> x)
4879	{
4880		return x86::movmskps(As<Float4>(x));
4881	}
4882
4883	RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
4884	{
4885		return RValue<Int4>(createSwizzle4(x.value, select));
4886	}
4887
4888	Type *Int4::getType()
4889	{
4890		return T(llvm::VectorType::get(T(Int::getType()), 4));
4891	}
4892
4893	UInt4::UInt4() : XYZW(this)
4894	{
4895	}
4896
4897	UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
4898	{
4899		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
4900		// Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
4901
4902		// Smallest positive value representable in UInt, but not in Int
4903		const unsigned int ustart = 0x80000000u;
4904		const float ustartf = float(ustart);
4905
4906		// Check if the value can be represented as an Int
4907		Int4 uiValue = CmpNLT(cast, Float4(ustartf));
4908		// If the value is too large, subtract ustart and re-add it after conversion.
4909		uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
4910		// Otherwise, just convert normally
4911		          (~uiValue & Int4(cast));
4912		// If the value is negative, store 0, otherwise store the result of the conversion
4913		storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
4914	}
4915
4916	UInt4::UInt4(int xyzw) : XYZW(this)
4917	{
4918		constant(xyzw, xyzw, xyzw, xyzw);
4919	}
4920
4921	UInt4::UInt4(int x, int yzw) : XYZW(this)
4922	{
4923		constant(x, yzw, yzw, yzw);
4924	}
4925
4926	UInt4::UInt4(int x, int y, int zw) : XYZW(this)
4927	{
4928		constant(x, y, zw, zw);
4929	}
4930
4931	UInt4::UInt4(int x, int y, int z, int w) : XYZW(this)
4932	{
4933		constant(x, y, z, w);
4934	}
4935
4936	void UInt4::constant(int x, int y, int z, int w)
4937	{
4938		int64_t constantVector[4] = {x, y, z, w};
4939		storeValue(Nucleus::createConstantVector(constantVector, getType()));
4940	}
4941
4942	UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this)
4943	{
4944		storeValue(rhs.value);
4945	}
4946
4947	UInt4::UInt4(const UInt4 &rhs) : XYZW(this)
4948	{
4949		Value *value = rhs.loadValue();
4950		storeValue(value);
4951	}
4952
4953	UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this)
4954	{
4955		Value *value = rhs.loadValue();
4956		storeValue(value);
4957	}
4958
4959	UInt4::UInt4(RValue<Int4> rhs) : XYZW(this)
4960	{
4961		storeValue(rhs.value);
4962	}
4963
4964	UInt4::UInt4(const Int4 &rhs) : XYZW(this)
4965	{
4966		Value *value = rhs.loadValue();
4967		storeValue(value);
4968	}
4969
4970	UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this)
4971	{
4972		Value *value = rhs.loadValue();
4973		storeValue(value);
4974	}
4975
4976	UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this)
4977	{
4978		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
4979		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4980
4981		storeValue(packed);
4982	}
4983
4984	RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
4985	{
4986		storeValue(rhs.value);
4987
4988		return rhs;
4989	}
4990
4991	RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
4992	{
4993		Value *value = rhs.loadValue();
4994		storeValue(value);
4995
4996		return RValue<UInt4>(value);
4997	}
4998
4999	RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5000	{
5001		Value *value = rhs.loadValue();
5002		storeValue(value);
5003
5004		return RValue<UInt4>(value);
5005	}
5006
5007	RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5008	{
5009		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5010	}
5011
5012	RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5013	{
5014		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5015	}
5016
5017	RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5018	{
5019		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5020	}
5021
5022	RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5023	{
5024		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5025	}
5026
5027	RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5028	{
5029		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5030	}
5031
5032	RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5033	{
5034		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5035	}
5036
5037	RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5038	{
5039		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5040	}
5041
5042	RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5043	{
5044		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5045	}
5046
5047	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5048	{
5049		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5050	}
5051
5052	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5053	{
5054		return x86::psrld(lhs, rhs);
5055	}
5056
5057	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5058	{
5059		return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5060	}
5061
5062	RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5063	{
5064		return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5065	}
5066
5067	RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5068	{
5069		return lhs = lhs + rhs;
5070	}
5071
5072	RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5073	{
5074		return lhs = lhs - rhs;
5075	}
5076
5077	RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5078	{
5079		return lhs = lhs * rhs;
5080	}
5081
5082//	RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5083//	{
5084//		return lhs = lhs / rhs;
5085//	}
5086
5087//	RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5088//	{
5089//		return lhs = lhs % rhs;
5090//	}
5091
5092	RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5093	{
5094		return lhs = lhs & rhs;
5095	}
5096
5097	RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5098	{
5099		return lhs = lhs | rhs;
5100	}
5101
5102	RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5103	{
5104		return lhs = lhs ^ rhs;
5105	}
5106
5107	RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5108	{
5109		return lhs = lhs << rhs;
5110	}
5111
5112	RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5113	{
5114		return lhs = lhs >> rhs;
5115	}
5116
5117	RValue<UInt4> operator+(RValue<UInt4> val)
5118	{
5119		return val;
5120	}
5121
5122	RValue<UInt4> operator-(RValue<UInt4> val)
5123	{
5124		return RValue<UInt4>(Nucleus::createNeg(val.value));
5125	}
5126
5127	RValue<UInt4> operator~(RValue<UInt4> val)
5128	{
5129		return RValue<UInt4>(Nucleus::createNot(val.value));
5130	}
5131
5132	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5133	{
5134		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5135		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5136		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5137		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5138	}
5139
5140	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5141	{
5142		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5143	}
5144
5145	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5146	{
5147		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5148		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5149		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5150		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5151	}
5152
5153	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5154	{
5155		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5156	}
5157
5158	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5159	{
5160		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5161		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5162		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5163		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5164	}
5165
5166	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5167	{
5168		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5169	}
5170
5171	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5172	{
5173		if(CPUID::supportsSSE4_1())
5174		{
5175			return x86::pmaxud(x, y);
5176		}
5177		else
5178		{
5179			RValue<UInt4> greater = CmpNLE(x, y);
5180			return (x & greater) | (y & ~greater);
5181		}
5182	}
5183
5184	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5185	{
5186		if(CPUID::supportsSSE4_1())
5187		{
5188			return x86::pminud(x, y);
5189		}
5190		else
5191		{
5192			RValue<UInt4> less = CmpLT(x, y);
5193			return (x & less) | (y & ~less);
5194		}
5195	}
5196
5197	Type *UInt4::getType()
5198	{
5199		return T(llvm::VectorType::get(T(UInt::getType()), 4));
5200	}
5201
5202	Float::Float(RValue<Int> cast)
5203	{
5204		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5205
5206		storeValue(integer);
5207	}
5208
5209	Float::Float(RValue<UInt> cast)
5210	{
5211		RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
5212		                       As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
5213
5214		storeValue(result.value);
5215	}
5216
5217	Float::Float(float x)
5218	{
5219		storeValue(Nucleus::createConstantFloat(x));
5220	}
5221
5222	Float::Float(RValue<Float> rhs)
5223	{
5224		storeValue(rhs.value);
5225	}
5226
5227	Float::Float(const Float &rhs)
5228	{
5229		Value *value = rhs.loadValue();
5230		storeValue(value);
5231	}
5232
5233	Float::Float(const Reference<Float> &rhs)
5234	{
5235		Value *value = rhs.loadValue();
5236		storeValue(value);
5237	}
5238
5239	RValue<Float> Float::operator=(RValue<Float> rhs)
5240	{
5241		storeValue(rhs.value);
5242
5243		return rhs;
5244	}
5245
5246	RValue<Float> Float::operator=(const Float &rhs)
5247	{
5248		Value *value = rhs.loadValue();
5249		storeValue(value);
5250
5251		return RValue<Float>(value);
5252	}
5253
5254	RValue<Float> Float::operator=(const Reference<Float> &rhs)
5255	{
5256		Value *value = rhs.loadValue();
5257		storeValue(value);
5258
5259		return RValue<Float>(value);
5260	}
5261
5262	RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5263	{
5264		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5265	}
5266
5267	RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5268	{
5269		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5270	}
5271
5272	RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5273	{
5274		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5275	}
5276
5277	RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5278	{
5279		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5280	}
5281
5282	RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5283	{
5284		return lhs = lhs + rhs;
5285	}
5286
5287	RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5288	{
5289		return lhs = lhs - rhs;
5290	}
5291
5292	RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5293	{
5294		return lhs = lhs * rhs;
5295	}
5296
5297	RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5298	{
5299		return lhs = lhs / rhs;
5300	}
5301
5302	RValue<Float> operator+(RValue<Float> val)
5303	{
5304		return val;
5305	}
5306
5307	RValue<Float> operator-(RValue<Float> val)
5308	{
5309		return RValue<Float>(Nucleus::createFNeg(val.value));
5310	}
5311
5312	RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5313	{
5314		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5315	}
5316
5317	RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5318	{
5319		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5320	}
5321
5322	RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5323	{
5324		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5325	}
5326
5327	RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5328	{
5329		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5330	}
5331
5332	RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5333	{
5334		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5335	}
5336
5337	RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5338	{
5339		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5340	}
5341
5342	RValue<Float> Abs(RValue<Float> x)
5343	{
5344		return IfThenElse(x > 0.0f, x, -x);
5345	}
5346
5347	RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5348	{
5349		return IfThenElse(x > y, x, y);
5350	}
5351
5352	RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5353	{
5354		return IfThenElse(x < y, x, y);
5355	}
5356
5357	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5358	{
5359		#if defined(__i386__) || defined(__x86_64__)
5360			if(exactAtPow2)
5361			{
5362				// rcpss uses a piecewise-linear approximation which minimizes the relative error
5363				// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
5364				return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
5365			}
5366		#endif
5367
5368		return x86::rcpss(x);
5369	}
5370
5371	RValue<Float> RcpSqrt_pp(RValue<Float> x)
5372	{
5373		return x86::rsqrtss(x);
5374	}
5375
5376	RValue<Float> Sqrt(RValue<Float> x)
5377	{
5378		return x86::sqrtss(x);
5379	}
5380
5381	RValue<Float> Round(RValue<Float> x)
5382	{
5383		if(CPUID::supportsSSE4_1())
5384		{
5385			return x86::roundss(x, 0);
5386		}
5387		else
5388		{
5389			return Float4(Round(Float4(x))).x;
5390		}
5391	}
5392
5393	RValue<Float> Trunc(RValue<Float> x)
5394	{
5395		if(CPUID::supportsSSE4_1())
5396		{
5397			return x86::roundss(x, 3);
5398		}
5399		else
5400		{
5401			return Float(Int(x));   // Rounded toward zero
5402		}
5403	}
5404
5405	RValue<Float> Frac(RValue<Float> x)
5406	{
5407		if(CPUID::supportsSSE4_1())
5408		{
5409			return x - x86::floorss(x);
5410		}
5411		else
5412		{
5413			return Float4(Frac(Float4(x))).x;
5414		}
5415	}
5416
5417	RValue<Float> Floor(RValue<Float> x)
5418	{
5419		if(CPUID::supportsSSE4_1())
5420		{
5421			return x86::floorss(x);
5422		}
5423		else
5424		{
5425			return Float4(Floor(Float4(x))).x;
5426		}
5427	}
5428
5429	RValue<Float> Ceil(RValue<Float> x)
5430	{
5431		if(CPUID::supportsSSE4_1())
5432		{
5433			return x86::ceilss(x);
5434		}
5435		else
5436		{
5437			return Float4(Ceil(Float4(x))).x;
5438		}
5439	}
5440
5441	Type *Float::getType()
5442	{
5443		return T(llvm::Type::getFloatTy(*::context));
5444	}
5445
5446	Float2::Float2(RValue<Float4> cast)
5447	{
5448		storeValue(Nucleus::createBitCast(cast.value, getType()));
5449	}
5450
5451	Type *Float2::getType()
5452	{
5453		return T(Type_v2f32);
5454	}
5455
5456	Float4::Float4(RValue<Byte4> cast) : XYZW(this)
5457	{
5458		Value *a = Int4(cast).loadValue();
5459		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5460
5461		storeValue(xyzw);
5462	}
5463
5464	Float4::Float4(RValue<SByte4> cast) : XYZW(this)
5465	{
5466		Value *a = Int4(cast).loadValue();
5467		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5468
5469		storeValue(xyzw);
5470	}
5471
5472	Float4::Float4(RValue<Short4> cast) : XYZW(this)
5473	{
5474		Int4 c(cast);
5475		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5476	}
5477
5478	Float4::Float4(RValue<UShort4> cast) : XYZW(this)
5479	{
5480		Int4 c(cast);
5481		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5482	}
5483
5484	Float4::Float4(RValue<Int4> cast) : XYZW(this)
5485	{
5486		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5487
5488		storeValue(xyzw);
5489	}
5490
5491	Float4::Float4(RValue<UInt4> cast) : XYZW(this)
5492	{
5493		RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5494		                        As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5495
5496		storeValue(result.value);
5497	}
5498
5499	Float4::Float4() : XYZW(this)
5500	{
5501	}
5502
5503	Float4::Float4(float xyzw) : XYZW(this)
5504	{
5505		constant(xyzw, xyzw, xyzw, xyzw);
5506	}
5507
5508	Float4::Float4(float x, float yzw) : XYZW(this)
5509	{
5510		constant(x, yzw, yzw, yzw);
5511	}
5512
5513	Float4::Float4(float x, float y, float zw) : XYZW(this)
5514	{
5515		constant(x, y, zw, zw);
5516	}
5517
5518	Float4::Float4(float x, float y, float z, float w) : XYZW(this)
5519	{
5520		constant(x, y, z, w);
5521	}
5522
5523	void Float4::constant(float x, float y, float z, float w)
5524	{
5525		double constantVector[4] = {x, y, z, w};
5526		storeValue(Nucleus::createConstantVector(constantVector, getType()));
5527	}
5528
5529	Float4::Float4(RValue<Float4> rhs) : XYZW(this)
5530	{
5531		storeValue(rhs.value);
5532	}
5533
5534	Float4::Float4(const Float4 &rhs) : XYZW(this)
5535	{
5536		Value *value = rhs.loadValue();
5537		storeValue(value);
5538	}
5539
5540	Float4::Float4(const Reference<Float4> &rhs) : XYZW(this)
5541	{
5542		Value *value = rhs.loadValue();
5543		storeValue(value);
5544	}
5545
5546	Float4::Float4(RValue<Float> rhs) : XYZW(this)
5547	{
5548		Value *vector = loadValue();
5549		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5550
5551		int swizzle[4] = {0, 0, 0, 0};
5552		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5553
5554		storeValue(replicate);
5555	}
5556
5557	Float4::Float4(const Float &rhs) : XYZW(this)
5558	{
5559		*this = RValue<Float>(rhs.loadValue());
5560	}
5561
5562	Float4::Float4(const Reference<Float> &rhs) : XYZW(this)
5563	{
5564		*this = RValue<Float>(rhs.loadValue());
5565	}
5566
5567	RValue<Float4> Float4::operator=(float x)
5568	{
5569		return *this = Float4(x, x, x, x);
5570	}
5571
5572	RValue<Float4> Float4::operator=(RValue<Float4> rhs)
5573	{
5574		storeValue(rhs.value);
5575
5576		return rhs;
5577	}
5578
5579	RValue<Float4> Float4::operator=(const Float4 &rhs)
5580	{
5581		Value *value = rhs.loadValue();
5582		storeValue(value);
5583
5584		return RValue<Float4>(value);
5585	}
5586
5587	RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
5588	{
5589		Value *value = rhs.loadValue();
5590		storeValue(value);
5591
5592		return RValue<Float4>(value);
5593	}
5594
5595	RValue<Float4> Float4::operator=(RValue<Float> rhs)
5596	{
5597		return *this = Float4(rhs);
5598	}
5599
5600	RValue<Float4> Float4::operator=(const Float &rhs)
5601	{
5602		return *this = Float4(rhs);
5603	}
5604
5605	RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
5606	{
5607		return *this = Float4(rhs);
5608	}
5609
5610	RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
5611	{
5612		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
5613	}
5614
5615	RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
5616	{
5617		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
5618	}
5619
5620	RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
5621	{
5622		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
5623	}
5624
5625	RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
5626	{
5627		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
5628	}
5629
5630	RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
5631	{
5632		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
5633	}
5634
5635	RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
5636	{
5637		return lhs = lhs + rhs;
5638	}
5639
5640	RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
5641	{
5642		return lhs = lhs - rhs;
5643	}
5644
5645	RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
5646	{
5647		return lhs = lhs * rhs;
5648	}
5649
5650	RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
5651	{
5652		return lhs = lhs / rhs;
5653	}
5654
5655	RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
5656	{
5657		return lhs = lhs % rhs;
5658	}
5659
5660	RValue<Float4> operator+(RValue<Float4> val)
5661	{
5662		return val;
5663	}
5664
5665	RValue<Float4> operator-(RValue<Float4> val)
5666	{
5667		return RValue<Float4>(Nucleus::createFNeg(val.value));
5668	}
5669
5670	RValue<Float4> Abs(RValue<Float4> x)
5671	{
5672		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
5673		int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
5674		Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
5675
5676		return As<Float4>(result);
5677	}
5678
5679	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
5680	{
5681		return x86::maxps(x, y);
5682	}
5683
5684	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
5685	{
5686		return x86::minps(x, y);
5687	}
5688
5689	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
5690	{
5691		#if defined(__i386__) || defined(__x86_64__)
5692			if(exactAtPow2)
5693			{
5694				// rcpps uses a piecewise-linear approximation which minimizes the relative error
5695				// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
5696				return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
5697			}
5698		#endif
5699
5700		return x86::rcpps(x);
5701	}
5702
5703	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
5704	{
5705		return x86::rsqrtps(x);
5706	}
5707
5708	RValue<Float4> Sqrt(RValue<Float4> x)
5709	{
5710		return x86::sqrtps(x);
5711	}
5712
5713	RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
5714	{
5715		return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
5716	}
5717
5718	RValue<Float> Extract(RValue<Float4> x, int i)
5719	{
5720		return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
5721	}
5722
5723	RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
5724	{
5725		return RValue<Float4>(createSwizzle4(x.value, select));
5726	}
5727
5728	RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
5729	{
5730		int shuffle[4] =
5731		{
5732			((imm >> 0) & 0x03) + 0,
5733			((imm >> 2) & 0x03) + 0,
5734			((imm >> 4) & 0x03) + 4,
5735			((imm >> 6) & 0x03) + 4,
5736		};
5737
5738		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5739	}
5740
5741	RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
5742	{
5743		int shuffle[4] = {0, 4, 1, 5};
5744		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5745	}
5746
5747	RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
5748	{
5749		int shuffle[4] = {2, 6, 3, 7};
5750		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5751	}
5752
5753	RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
5754	{
5755		Value *vector = lhs.loadValue();
5756		Value *result = createMask4(vector, rhs.value, select);
5757		lhs.storeValue(result);
5758
5759		return RValue<Float4>(result);
5760	}
5761
5762	RValue<Int> SignMask(RValue<Float4> x)
5763	{
5764		return x86::movmskps(x);
5765	}
5766
5767	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
5768	{
5769	//	return As<Int4>(x86::cmpeqps(x, y));
5770		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
5771	}
5772
5773	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
5774	{
5775	//	return As<Int4>(x86::cmpltps(x, y));
5776		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
5777	}
5778
5779	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
5780	{
5781	//	return As<Int4>(x86::cmpleps(x, y));
5782		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
5783	}
5784
5785	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
5786	{
5787	//	return As<Int4>(x86::cmpneqps(x, y));
5788		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
5789	}
5790
5791	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
5792	{
5793	//	return As<Int4>(x86::cmpnltps(x, y));
5794		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
5795	}
5796
5797	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
5798	{
5799	//	return As<Int4>(x86::cmpnleps(x, y));
5800		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
5801	}
5802
5803	RValue<Int4> IsInf(RValue<Float4> x)
5804	{
5805		return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000));
5806	}
5807
5808	RValue<Int4> IsNan(RValue<Float4> x)
5809	{
5810		return ~CmpEQ(x, x);
5811	}
5812
5813	RValue<Float4> Round(RValue<Float4> x)
5814	{
5815		if(CPUID::supportsSSE4_1())
5816		{
5817			return x86::roundps(x, 0);
5818		}
5819		else
5820		{
5821			return Float4(RoundInt(x));
5822		}
5823	}
5824
5825	RValue<Float4> Trunc(RValue<Float4> x)
5826	{
5827		if(CPUID::supportsSSE4_1())
5828		{
5829			return x86::roundps(x, 3);
5830		}
5831		else
5832		{
5833			return Float4(Int4(x));
5834		}
5835	}
5836
5837	RValue<Float4> Frac(RValue<Float4> x)
5838	{
5839		Float4 frc;
5840
5841		if(CPUID::supportsSSE4_1())
5842		{
5843			frc = x - Floor(x);
5844		}
5845		else
5846		{
5847			frc = x - Float4(Int4(x));   // Signed fractional part.
5848
5849			frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
5850		}
5851
5852		// x - floor(x) can be 1.0 for very small negative x.
5853		// Clamp against the value just below 1.0.
5854		return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
5855	}
5856
5857	RValue<Float4> Floor(RValue<Float4> x)
5858	{
5859		if(CPUID::supportsSSE4_1())
5860		{
5861			return x86::floorps(x);
5862		}
5863		else
5864		{
5865			return x - Frac(x);
5866		}
5867	}
5868
5869	RValue<Float4> Ceil(RValue<Float4> x)
5870	{
5871		if(CPUID::supportsSSE4_1())
5872		{
5873			return x86::ceilps(x);
5874		}
5875		else
5876		{
5877			return -Floor(-x);
5878		}
5879	}
5880
5881	Type *Float4::getType()
5882	{
5883		return T(llvm::VectorType::get(T(Float::getType()), 4));
5884	}
5885
5886	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
5887	{
5888		return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
5889	}
5890
5891	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
5892	{
5893		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
5894	}
5895
5896	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
5897	{
5898		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
5899	}
5900
5901	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
5902	{
5903		return lhs = lhs + offset;
5904	}
5905
5906	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
5907	{
5908		return lhs = lhs + offset;
5909	}
5910
5911	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
5912	{
5913		return lhs = lhs + offset;
5914	}
5915
5916	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
5917	{
5918		return lhs + -offset;
5919	}
5920
5921	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
5922	{
5923		return lhs + -offset;
5924	}
5925
5926	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
5927	{
5928		return lhs + -offset;
5929	}
5930
5931	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
5932	{
5933		return lhs = lhs - offset;
5934	}
5935
5936	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
5937	{
5938		return lhs = lhs - offset;
5939	}
5940
5941	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
5942	{
5943		return lhs = lhs - offset;
5944	}
5945
5946	void Return()
5947	{
5948		Nucleus::createRetVoid();
5949		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
5950		Nucleus::createUnreachable();
5951	}
5952
5953	void Return(RValue<Int> ret)
5954	{
5955		Nucleus::createRet(ret.value);
5956		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
5957		Nucleus::createUnreachable();
5958	}
5959
5960	void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
5961	{
5962		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
5963		Nucleus::setInsertBlock(bodyBB);
5964	}
5965
5966	RValue<Long> Ticks()
5967	{
5968		llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
5969
5970		return RValue<Long>(V(::builder->CreateCall(rdtsc)));
5971	}
5972}
5973
5974namespace sw
5975{
5976	namespace x86
5977	{
5978		RValue<Int> cvtss2si(RValue<Float> val)
5979		{
5980			llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
5981
5982			Float4 vector;
5983			vector.x = val;
5984
5985			return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
5986		}
5987
5988		RValue<Int4> cvtps2dq(RValue<Float4> val)
5989		{
5990			llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
5991
5992			return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
5993		}
5994
5995		RValue<Float> rcpss(RValue<Float> val)
5996		{
5997			llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
5998
5999			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
6000
6001			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
6002		}
6003
6004		RValue<Float> sqrtss(RValue<Float> val)
6005		{
6006			llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
6007
6008			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
6009
6010			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
6011		}
6012
6013		RValue<Float> rsqrtss(RValue<Float> val)
6014		{
6015			llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
6016
6017			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
6018
6019			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
6020		}
6021
6022		RValue<Float4> rcpps(RValue<Float4> val)
6023		{
6024			llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
6025
6026			return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
6027		}
6028
6029		RValue<Float4> sqrtps(RValue<Float4> val)
6030		{
6031			llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
6032
6033			return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
6034		}
6035
6036		RValue<Float4> rsqrtps(RValue<Float4> val)
6037		{
6038			llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
6039
6040			return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
6041		}
6042
6043		RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
6044		{
6045			llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
6046
6047			return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
6048		}
6049
6050		RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
6051		{
6052			llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
6053
6054			return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
6055		}
6056
6057		RValue<Float> roundss(RValue<Float> val, unsigned char imm)
6058		{
6059			llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
6060
6061			Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
6062			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
6063
6064			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
6065		}
6066
6067		RValue<Float> floorss(RValue<Float> val)
6068		{
6069			return roundss(val, 1);
6070		}
6071
6072		RValue<Float> ceilss(RValue<Float> val)
6073		{
6074			return roundss(val, 2);
6075		}
6076
6077		RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
6078		{
6079			llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
6080
6081			return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
6082		}
6083
6084		RValue<Float4> floorps(RValue<Float4> val)
6085		{
6086			return roundps(val, 1);
6087		}
6088
6089		RValue<Float4> ceilps(RValue<Float4> val)
6090		{
6091			return roundps(val, 2);
6092		}
6093
6094		RValue<Int4> pabsd(RValue<Int4> x)
6095		{
6096			llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
6097
6098			return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
6099		}
6100
6101		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
6102		{
6103			llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
6104
6105			return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value)));
6106		}
6107
6108		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
6109		{
6110			llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
6111
6112			return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value)));
6113		}
6114
6115		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
6116		{
6117			llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
6118
6119			return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value)));
6120		}
6121
6122		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
6123		{
6124			llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
6125
6126			return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value)));
6127		}
6128
6129		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
6130		{
6131			llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
6132
6133			return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value)));
6134		}
6135
6136		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
6137		{
6138			llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
6139
6140			return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value)));
6141		}
6142
6143		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
6144		{
6145			llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
6146
6147			return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value)));
6148		}
6149
6150		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
6151		{
6152			llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
6153
6154			return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value)));
6155		}
6156
6157		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
6158		{
6159			llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
6160
6161			return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value)));
6162		}
6163
6164		RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
6165		{
6166			llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
6167
6168			return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value)));
6169		}
6170
6171		RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
6172		{
6173			llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
6174
6175			return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value)));
6176		}
6177
6178		RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
6179		{
6180			llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
6181
6182			return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value)));
6183		}
6184
6185		RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
6186		{
6187			llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
6188
6189			return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value)));
6190		}
6191
6192		RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
6193		{
6194			llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
6195
6196			return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value)));
6197		}
6198
6199		RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
6200		{
6201			llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
6202
6203			return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value)));
6204		}
6205
6206		RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
6207		{
6208			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
6209
6210			return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
6211		}
6212
6213		RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
6214		{
6215			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
6216
6217			return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
6218		}
6219
6220		RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
6221		{
6222			llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
6223
6224			return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
6225		}
6226
6227		RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
6228		{
6229			llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
6230
6231			return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value)));
6232		}
6233
6234		RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
6235		{
6236			if(CPUID::supportsSSE4_1())
6237			{
6238				llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
6239
6240				return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
6241			}
6242			else
6243			{
6244				RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
6245				RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
6246
6247				return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
6248			}
6249		}
6250
6251		RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
6252		{
6253			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
6254
6255			return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
6256		}
6257
6258		RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
6259		{
6260			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
6261
6262			return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
6263		}
6264
6265		RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
6266		{
6267			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
6268
6269			return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
6270		}
6271
6272		RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
6273		{
6274			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
6275
6276			return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
6277		}
6278
6279		RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
6280		{
6281			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
6282
6283			return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
6284		}
6285
6286		RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
6287		{
6288			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
6289
6290			return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
6291		}
6292
6293		RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
6294		{
6295			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
6296
6297			return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
6298		}
6299
6300		RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
6301		{
6302			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
6303
6304			return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
6305		}
6306
6307		RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
6308		{
6309			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
6310
6311			return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
6312		}
6313
6314		RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
6315		{
6316			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
6317
6318			return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
6319		}
6320
6321		RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
6322		{
6323			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
6324
6325			return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
6326		}
6327
6328		RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
6329		{
6330			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
6331
6332			return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
6333		}
6334
6335		RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
6336		{
6337			llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
6338
6339			return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
6340		}
6341
6342		RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
6343		{
6344			llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
6345
6346			return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
6347		}
6348
6349		RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
6350		{
6351			llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
6352
6353			return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
6354		}
6355
6356		RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
6357		{
6358			llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
6359
6360			return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
6361		}
6362
6363		RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
6364		{
6365			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
6366
6367			return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
6368		}
6369
6370		RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
6371		{
6372			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
6373
6374			return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
6375		}
6376
6377		RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
6378		{
6379			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
6380
6381			return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
6382		}
6383
6384		RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
6385		{
6386			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
6387
6388			return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
6389		}
6390
6391		RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
6392		{
6393			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
6394
6395			return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
6396		}
6397
6398		RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
6399		{
6400			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
6401
6402			return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
6403		}
6404
6405		RValue<Int> movmskps(RValue<Float4> x)
6406		{
6407			llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
6408
6409			return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
6410		}
6411
6412		RValue<Int> pmovmskb(RValue<Byte8> x)
6413		{
6414			llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
6415
6416			return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF;
6417		}
6418
6419		RValue<Int4> pmovzxbd(RValue<Byte16> x)
6420		{
6421			llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
6422
6423			return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value)));
6424		}
6425
6426		RValue<Int4> pmovsxbd(RValue<SByte16> x)
6427		{
6428			llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
6429
6430			return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value)));
6431		}
6432
6433		RValue<Int4> pmovzxwd(RValue<UShort8> x)
6434		{
6435			llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
6436
6437			return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value)));
6438		}
6439
6440		RValue<Int4> pmovsxwd(RValue<Short8> x)
6441		{
6442			llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
6443
6444			return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value)));
6445		}
6446	}
6447}
6448