1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
17#include "Shader/ShaderCore.hpp"
18#include "Reactor/Reactor.hpp"
19#include "Common/Memory.hpp"
20#include "Common/Debug.hpp"
21
22namespace sw
23{
24	Blitter::Blitter()
25	{
26		blitCache = new RoutineCache<State>(1024);
27	}
28
29	Blitter::~Blitter()
30	{
31		delete blitCache;
32	}
33
34	void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
35	{
36		if(fastClear(pixel, format, dest, dRect, rgbaMask))
37		{
38			return;
39		}
40
41		sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
42		SliceRectF sRect((float)dRect.x0, (float)dRect.y0, (float)dRect.x1, (float)dRect.y1, 0);
43		blit(color, sRect, dest, dRect, {rgbaMask});
44		delete color;
45	}
46
47	bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
48	{
49		if(format != FORMAT_A32B32G32R32F)
50		{
51			return false;
52		}
53
54		float *color = (float*)pixel;
55		float r = color[0];
56		float g = color[1];
57		float b = color[2];
58		float a = color[3];
59
60		uint32_t packed;
61
62		switch(dest->getFormat())
63		{
64		case FORMAT_R5G6B5:
65			if((rgbaMask & 0x7) != 0x7) return false;
66			packed = ((uint16_t)(31 * b + 0.5f) << 0) |
67			         ((uint16_t)(63 * g + 0.5f) << 5) |
68			         ((uint16_t)(31 * r + 0.5f) << 11);
69			break;
70		case FORMAT_X8B8G8R8:
71			if((rgbaMask & 0x7) != 0x7) return false;
72			packed = ((uint32_t)(255) << 24) |
73			         ((uint32_t)(255 * b + 0.5f) << 16) |
74			         ((uint32_t)(255 * g + 0.5f) << 8) |
75			         ((uint32_t)(255 * r + 0.5f) << 0);
76			break;
77		case FORMAT_A8B8G8R8:
78			if((rgbaMask & 0xF) != 0xF) return false;
79			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
80			         ((uint32_t)(255 * b + 0.5f) << 16) |
81			         ((uint32_t)(255 * g + 0.5f) << 8) |
82			         ((uint32_t)(255 * r + 0.5f) << 0);
83			break;
84		case FORMAT_X8R8G8B8:
85			if((rgbaMask & 0x7) != 0x7) return false;
86			packed = ((uint32_t)(255) << 24) |
87			         ((uint32_t)(255 * r + 0.5f) << 16) |
88			         ((uint32_t)(255 * g + 0.5f) << 8) |
89			         ((uint32_t)(255 * b + 0.5f) << 0);
90			break;
91		case FORMAT_A8R8G8B8:
92			if((rgbaMask & 0xF) != 0xF) return false;
93			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
94			         ((uint32_t)(255 * r + 0.5f) << 16) |
95			         ((uint32_t)(255 * g + 0.5f) << 8) |
96			         ((uint32_t)(255 * b + 0.5f) << 0);
97			break;
98		default:
99			return false;
100		}
101
102		bool useDestInternal = !dest->isExternalDirty();
103		uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal);
104
105		for(int j = 0; j < dest->getSamples(); j++)
106		{
107			uint8_t *d = slice;
108
109			switch(Surface::bytes(dest->getFormat()))
110			{
111			case 2:
112				for(int i = dRect.y0; i < dRect.y1; i++)
113				{
114					sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
115					d += dest->getPitchB(useDestInternal);
116				}
117				break;
118			case 4:
119				for(int i = dRect.y0; i < dRect.y1; i++)
120				{
121					sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
122					d += dest->getPitchB(useDestInternal);
123				}
124				break;
125			default:
126				assert(false);
127			}
128
129			slice += dest->getSliceB(useDestInternal);
130		}
131
132		dest->unlock(useDestInternal);
133
134		return true;
135	}
136
137	void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
138	{
139		if(dest->getInternalFormat() == FORMAT_NULL)
140		{
141			return;
142		}
143
144		if(blitReactor(source, sourceRect, dest, destRect, options))
145		{
146			return;
147		}
148
149		SliceRectF sRect = sourceRect;
150		SliceRect dRect = destRect;
151
152		bool flipX = destRect.x0 > destRect.x1;
153		bool flipY = destRect.y0 > destRect.y1;
154
155		if(flipX)
156		{
157			swap(dRect.x0, dRect.x1);
158			swap(sRect.x0, sRect.x1);
159		}
160		if(flipY)
161		{
162			swap(dRect.y0, dRect.y1);
163			swap(sRect.y0, sRect.y1);
164		}
165
166		source->lockInternal((int)sRect.x0, (int)sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
167		dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
168
169		float w = sRect.width() / dRect.width();
170		float h = sRect.height() / dRect.height();
171
172		const float xStart = sRect.x0 + 0.5f * w;
173		float y = sRect.y0 + 0.5f * h;
174
175		for(int j = dRect.y0; j < dRect.y1; j++)
176		{
177			float x = xStart;
178
179			for(int i = dRect.x0; i < dRect.x1; i++)
180			{
181				// FIXME: Support RGBA mask
182				dest->copyInternal(source, i, j, x, y, options.filter);
183
184				x += w;
185			}
186
187			y += h;
188		}
189
190		source->unlockInternal();
191		dest->unlockInternal();
192	}
193
194	void Blitter::blit3D(Surface *source, Surface *dest)
195	{
196		source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
197		dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
198
199		float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
200		float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
201		float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
202
203		float z = 0.5f * d;
204		for(int k = 0; k < dest->getDepth(); k++)
205		{
206			float y = 0.5f * h;
207			for(int j = 0; j < dest->getHeight(); j++)
208			{
209				float x = 0.5f * w;
210				for(int i = 0; i < dest->getWidth(); i++)
211				{
212					dest->copyInternal(source, i, j, k, x, y, z, true);
213					x += w;
214				}
215				y += h;
216			}
217			z += d;
218		}
219
220		source->unlockInternal();
221		dest->unlockInternal();
222	}
223
224	bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state)
225	{
226		c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
227
228		switch(state.sourceFormat)
229		{
230		case FORMAT_L8:
231			c.xyz = Float(Int(*Pointer<Byte>(element)));
232			c.w = float(0xFF);
233			break;
234		case FORMAT_A8:
235			c.w = Float(Int(*Pointer<Byte>(element)));
236			break;
237		case FORMAT_R8I:
238		case FORMAT_R8_SNORM:
239			c.x = Float(Int(*Pointer<SByte>(element)));
240			c.w = float(0x7F);
241			break;
242		case FORMAT_R8:
243		case FORMAT_R8UI:
244			c.x = Float(Int(*Pointer<Byte>(element)));
245			c.w = float(0xFF);
246			break;
247		case FORMAT_R16I:
248			c.x = Float(Int(*Pointer<Short>(element)));
249			c.w = float(0x7FFF);
250			break;
251		case FORMAT_R16UI:
252			c.x = Float(Int(*Pointer<UShort>(element)));
253			c.w = float(0xFFFF);
254			break;
255		case FORMAT_R32I:
256			c.x = Float(*Pointer<Int>(element));
257			c.w = float(0x7FFFFFFF);
258			break;
259		case FORMAT_R32UI:
260			c.x = Float(*Pointer<UInt>(element));
261			c.w = float(0xFFFFFFFF);
262			break;
263		case FORMAT_A8R8G8B8:
264			c = Float4(*Pointer<Byte4>(element)).zyxw;
265			break;
266		case FORMAT_A8B8G8R8I:
267		case FORMAT_A8B8G8R8_SNORM:
268			c = Float4(*Pointer<SByte4>(element));
269			break;
270		case FORMAT_A8B8G8R8:
271		case FORMAT_A8B8G8R8UI:
272		case FORMAT_SRGB8_A8:
273			c = Float4(*Pointer<Byte4>(element));
274			break;
275		case FORMAT_X8R8G8B8:
276			c = Float4(*Pointer<Byte4>(element)).zyxw;
277			c.w = float(0xFF);
278			break;
279		case FORMAT_R8G8B8:
280			c.z = Float(Int(*Pointer<Byte>(element + 0)));
281			c.y = Float(Int(*Pointer<Byte>(element + 1)));
282			c.x = Float(Int(*Pointer<Byte>(element + 2)));
283			c.w = float(0xFF);
284			break;
285		case FORMAT_B8G8R8:
286			c.x = Float(Int(*Pointer<Byte>(element + 0)));
287			c.y = Float(Int(*Pointer<Byte>(element + 1)));
288			c.z = Float(Int(*Pointer<Byte>(element + 2)));
289			c.w = float(0xFF);
290			break;
291		case FORMAT_X8B8G8R8I:
292		case FORMAT_X8B8G8R8_SNORM:
293			c = Float4(*Pointer<SByte4>(element));
294			c.w = float(0x7F);
295			break;
296		case FORMAT_X8B8G8R8:
297		case FORMAT_X8B8G8R8UI:
298		case FORMAT_SRGB8_X8:
299			c = Float4(*Pointer<Byte4>(element));
300			c.w = float(0xFF);
301			break;
302		case FORMAT_A16B16G16R16I:
303			c = Float4(*Pointer<Short4>(element));
304			break;
305		case FORMAT_A16B16G16R16:
306		case FORMAT_A16B16G16R16UI:
307			c = Float4(*Pointer<UShort4>(element));
308			break;
309		case FORMAT_X16B16G16R16I:
310			c = Float4(*Pointer<Short4>(element));
311			c.w = float(0x7FFF);
312			break;
313		case FORMAT_X16B16G16R16UI:
314			c = Float4(*Pointer<UShort4>(element));
315			c.w = float(0xFFFF);
316			break;
317		case FORMAT_A32B32G32R32I:
318			c = Float4(*Pointer<Int4>(element));
319			break;
320		case FORMAT_A32B32G32R32UI:
321			c = Float4(*Pointer<UInt4>(element));
322			break;
323		case FORMAT_X32B32G32R32I:
324			c = Float4(*Pointer<Int4>(element));
325			c.w = float(0x7FFFFFFF);
326			break;
327		case FORMAT_X32B32G32R32UI:
328			c = Float4(*Pointer<UInt4>(element));
329			c.w = float(0xFFFFFFFF);
330			break;
331		case FORMAT_G8R8I:
332		case FORMAT_G8R8_SNORM:
333			c.x = Float(Int(*Pointer<SByte>(element + 0)));
334			c.y = Float(Int(*Pointer<SByte>(element + 1)));
335			c.w = float(0x7F);
336			break;
337		case FORMAT_G8R8:
338		case FORMAT_G8R8UI:
339			c.x = Float(Int(*Pointer<Byte>(element + 0)));
340			c.y = Float(Int(*Pointer<Byte>(element + 1)));
341			c.w = float(0xFF);
342			break;
343		case FORMAT_G16R16I:
344			c.x = Float(Int(*Pointer<Short>(element + 0)));
345			c.y = Float(Int(*Pointer<Short>(element + 2)));
346			c.w = float(0x7FFF);
347			break;
348		case FORMAT_G16R16:
349		case FORMAT_G16R16UI:
350			c.x = Float(Int(*Pointer<UShort>(element + 0)));
351			c.y = Float(Int(*Pointer<UShort>(element + 2)));
352			c.w = float(0xFFFF);
353			break;
354		case FORMAT_G32R32I:
355			c.x = Float(*Pointer<Int>(element + 0));
356			c.y = Float(*Pointer<Int>(element + 4));
357			c.w = float(0x7FFFFFFF);
358			break;
359		case FORMAT_G32R32UI:
360			c.x = Float(*Pointer<UInt>(element + 0));
361			c.y = Float(*Pointer<UInt>(element + 4));
362			c.w = float(0xFFFFFFFF);
363			break;
364		case FORMAT_A32B32G32R32F:
365			c = *Pointer<Float4>(element);
366			break;
367		case FORMAT_X32B32G32R32F:
368		case FORMAT_X32B32G32R32F_UNSIGNED:
369		case FORMAT_B32G32R32F:
370			c.z = *Pointer<Float>(element + 8);
371		case FORMAT_G32R32F:
372			c.x = *Pointer<Float>(element + 0);
373			c.y = *Pointer<Float>(element + 4);
374			break;
375		case FORMAT_R32F:
376			c.x = *Pointer<Float>(element);
377			break;
378		case FORMAT_R5G6B5:
379			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
380			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
381			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
382			break;
383		case FORMAT_A2B10G10R10:
384		case FORMAT_A2B10G10R10UI:
385			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
386			c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
387			c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
388			c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
389			break;
390		case FORMAT_D16:
391			c.x = Float(Int((*Pointer<UShort>(element))));
392			break;
393		case FORMAT_D24S8:
394			c.x = Float(Int((*Pointer<UInt>(element))));
395			break;
396		case FORMAT_D32:
397			c.x = Float(Int((*Pointer<UInt>(element))));
398			break;
399		case FORMAT_D32F_COMPLEMENTARY:
400		case FORMAT_D32FS8_COMPLEMENTARY:
401			c.x = 1.0f - *Pointer<Float>(element);
402			break;
403		case FORMAT_D32F:
404		case FORMAT_D32FS8:
405		case FORMAT_D32F_LOCKABLE:
406		case FORMAT_D32FS8_TEXTURE:
407		case FORMAT_D32F_SHADOW:
408		case FORMAT_D32FS8_SHADOW:
409			c.x = *Pointer<Float>(element);
410			break;
411		case FORMAT_S8:
412			c.x = Float(Int(*Pointer<Byte>(element)));
413			break;
414		default:
415			return false;
416		}
417
418		return true;
419	}
420
421	bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
422	{
423		bool writeR = state.writeRed;
424		bool writeG = state.writeGreen;
425		bool writeB = state.writeBlue;
426		bool writeA = state.writeAlpha;
427		bool writeRGBA = writeR && writeG && writeB && writeA;
428
429		switch(state.destFormat)
430		{
431		case FORMAT_L8:
432			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
433			break;
434		case FORMAT_A8:
435			if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
436			break;
437		case FORMAT_A8R8G8B8:
438			if(writeRGBA)
439			{
440				Short4 c0 = RoundShort4(c.zyxw);
441				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
442			}
443			else
444			{
445				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
446				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
447				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
448				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
449			}
450			break;
451		case FORMAT_A8B8G8R8:
452		case FORMAT_SRGB8_A8:
453			if(writeRGBA)
454			{
455				Short4 c0 = RoundShort4(c);
456				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
457			}
458			else
459			{
460				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
461				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
462				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
463				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
464			}
465			break;
466		case FORMAT_X8R8G8B8:
467			if(writeRGBA)
468			{
469				Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
470				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
471			}
472			else
473			{
474				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
475				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
476				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
477				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
478			}
479			break;
480		case FORMAT_X8B8G8R8:
481		case FORMAT_SRGB8_X8:
482			if(writeRGBA)
483			{
484				Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
485				*Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
486			}
487			else
488			{
489				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
490				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
491				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
492				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
493			}
494			break;
495		case FORMAT_R8G8B8:
496			if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
497			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
498			if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
499			break;
500		case FORMAT_B8G8R8:
501			if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
502			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
503			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
504			break;
505		case FORMAT_A32B32G32R32F:
506			if(writeRGBA)
507			{
508				*Pointer<Float4>(element) = c;
509			}
510			else
511			{
512				if(writeR) { *Pointer<Float>(element) = c.x; }
513				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
514				if(writeB) { *Pointer<Float>(element + 8) = c.z; }
515				if(writeA) { *Pointer<Float>(element + 12) = c.w; }
516			}
517			break;
518		case FORMAT_X32B32G32R32F:
519		case FORMAT_X32B32G32R32F_UNSIGNED:
520			if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
521		case FORMAT_B32G32R32F:
522			if(writeR) { *Pointer<Float>(element) = c.x; }
523			if(writeG) { *Pointer<Float>(element + 4) = c.y; }
524			if(writeB) { *Pointer<Float>(element + 8) = c.z; }
525			break;
526		case FORMAT_G32R32F:
527			if(writeR && writeG)
528			{
529				*Pointer<Float2>(element) = Float2(c);
530			}
531			else
532			{
533				if(writeR) { *Pointer<Float>(element) = c.x; }
534				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
535			}
536			break;
537		case FORMAT_R32F:
538			if(writeR) { *Pointer<Float>(element) = c.x; }
539			break;
540		case FORMAT_A8B8G8R8I:
541		case FORMAT_A8B8G8R8_SNORM:
542			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
543		case FORMAT_X8B8G8R8I:
544		case FORMAT_X8B8G8R8_SNORM:
545			if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM))
546			{
547				*Pointer<SByte>(element + 3) = SByte(0x7F);
548			}
549			if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
550		case FORMAT_G8R8I:
551		case FORMAT_G8R8_SNORM:
552			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
553		case FORMAT_R8I:
554		case FORMAT_R8_SNORM:
555			if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
556			break;
557		case FORMAT_A8B8G8R8UI:
558			if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
559		case FORMAT_X8B8G8R8UI:
560			if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI))
561			{
562				*Pointer<Byte>(element + 3) = Byte(0xFF);
563			}
564			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
565		case FORMAT_G8R8UI:
566		case FORMAT_G8R8:
567			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
568		case FORMAT_R8UI:
569		case FORMAT_R8:
570			if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
571			break;
572		case FORMAT_A16B16G16R16I:
573			if(writeRGBA)
574			{
575				*Pointer<Short4>(element) = Short4(RoundInt(c));
576			}
577			else
578			{
579				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
580				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
581				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
582				if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
583			}
584			break;
585		case FORMAT_X16B16G16R16I:
586			if(writeRGBA)
587			{
588				*Pointer<Short4>(element) = Short4(RoundInt(c));
589			}
590			else
591			{
592				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
593				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
594				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
595			}
596			if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
597			break;
598		case FORMAT_G16R16I:
599			if(writeR && writeG)
600			{
601				*Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
602			}
603			else
604			{
605				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
606				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
607			}
608			break;
609		case FORMAT_R16I:
610			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
611			break;
612		case FORMAT_A16B16G16R16UI:
613		case FORMAT_A16B16G16R16:
614			if(writeRGBA)
615			{
616				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
617			}
618			else
619			{
620				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
621				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
622				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
623				if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
624			}
625			break;
626		case FORMAT_X16B16G16R16UI:
627			if(writeRGBA)
628			{
629				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
630			}
631			else
632			{
633				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
634				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
635				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
636			}
637			if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
638			break;
639		case FORMAT_G16R16UI:
640		case FORMAT_G16R16:
641			if(writeR && writeG)
642			{
643				*Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
644			}
645			else
646			{
647				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
648				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
649			}
650			break;
651		case FORMAT_R16UI:
652			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
653			break;
654		case FORMAT_A32B32G32R32I:
655			if(writeRGBA)
656			{
657				*Pointer<Int4>(element) = RoundInt(c);
658			}
659			else
660			{
661				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
662				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
663				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
664				if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
665			}
666			break;
667		case FORMAT_X32B32G32R32I:
668			if(writeRGBA)
669			{
670				*Pointer<Int4>(element) = RoundInt(c);
671			}
672			else
673			{
674				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
675				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
676				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
677			}
678			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
679			break;
680		case FORMAT_G32R32I:
681			if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
682		case FORMAT_R32I:
683			if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
684			break;
685		case FORMAT_A32B32G32R32UI:
686			if(writeRGBA)
687			{
688				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
689			}
690			else
691			{
692				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
693				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
694				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
695				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
696			}
697			break;
698		case FORMAT_X32B32G32R32UI:
699			if(writeRGBA)
700			{
701				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
702			}
703			else
704			{
705				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
706				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
707				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
708			}
709			if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
710			break;
711		case FORMAT_G32R32UI:
712			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
713		case FORMAT_R32UI:
714			if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
715			break;
716		case FORMAT_R5G6B5:
717			if(writeR && writeG && writeB)
718			{
719				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
720				                                  (RoundInt(Float(c.y)) << Int(5)) |
721				                                  (RoundInt(Float(c.x)) << Int(11)));
722			}
723			else
724			{
725				unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
726				unsigned short unmask = ~mask;
727				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
728				                            (UShort(RoundInt(Float(c.z)) |
729				                                   (RoundInt(Float(c.y)) << Int(5)) |
730				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
731			}
732			break;
733		case FORMAT_A2B10G10R10:
734		case FORMAT_A2B10G10R10UI:
735			if(writeRGBA)
736			{
737				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
738				                              (RoundInt(Float(c.y)) << 10) |
739				                              (RoundInt(Float(c.z)) << 20) |
740				                              (RoundInt(Float(c.w)) << 30));
741			}
742			else
743			{
744				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
745				                    (writeB ? 0x3FF00000 : 0x0000) |
746				                    (writeG ? 0x000FFC00 : 0x0000) |
747				                    (writeR ? 0x000003FF : 0x0000);
748				unsigned int unmask = ~mask;
749				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
750				                            (UInt(RoundInt(Float(c.x)) |
751				                                  (RoundInt(Float(c.y)) << 10) |
752				                                  (RoundInt(Float(c.z)) << 20) |
753				                                  (RoundInt(Float(c.w)) << 30)) & UInt(mask));
754			}
755			break;
756		case FORMAT_D16:
757			*Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
758			break;
759		case FORMAT_D24S8:
760			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
761			break;
762		case FORMAT_D32:
763			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
764			break;
765		case FORMAT_D32F_COMPLEMENTARY:
766		case FORMAT_D32FS8_COMPLEMENTARY:
767			*Pointer<Float>(element) = 1.0f - c.x;
768			break;
769		case FORMAT_D32F:
770		case FORMAT_D32FS8:
771		case FORMAT_D32F_LOCKABLE:
772		case FORMAT_D32FS8_TEXTURE:
773		case FORMAT_D32F_SHADOW:
774		case FORMAT_D32FS8_SHADOW:
775			*Pointer<Float>(element) = c.x;
776			break;
777		case FORMAT_S8:
778			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
779			break;
780		default:
781			return false;
782		}
783		return true;
784	}
785
786	bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state)
787	{
788		c = Int4(0, 0, 0, 1);
789
790		switch(state.sourceFormat)
791		{
792		case FORMAT_A8B8G8R8I:
793			c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
794		case FORMAT_X8B8G8R8I:
795			c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
796		case FORMAT_G8R8I:
797			c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
798		case FORMAT_R8I:
799			c = Insert(c, Int(*Pointer<SByte>(element)), 0);
800			break;
801		case FORMAT_A8B8G8R8UI:
802			c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
803		case FORMAT_X8B8G8R8UI:
804			c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
805		case FORMAT_G8R8UI:
806			c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
807		case FORMAT_R8UI:
808			c = Insert(c, Int(*Pointer<Byte>(element)), 0);
809			break;
810		case FORMAT_A16B16G16R16I:
811			c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
812		case FORMAT_X16B16G16R16I:
813			c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
814		case FORMAT_G16R16I:
815			c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
816		case FORMAT_R16I:
817			c = Insert(c, Int(*Pointer<Short>(element)), 0);
818			break;
819		case FORMAT_A16B16G16R16UI:
820			c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
821		case FORMAT_X16B16G16R16UI:
822			c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
823		case FORMAT_G16R16UI:
824			c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
825		case FORMAT_R16UI:
826			c = Insert(c, Int(*Pointer<UShort>(element)), 0);
827			break;
828		case FORMAT_A32B32G32R32I:
829		case FORMAT_A32B32G32R32UI:
830			c = *Pointer<Int4>(element);
831			break;
832		case FORMAT_X32B32G32R32I:
833		case FORMAT_X32B32G32R32UI:
834			c = Insert(c, *Pointer<Int>(element + 8), 2);
835		case FORMAT_G32R32I:
836		case FORMAT_G32R32UI:
837			c = Insert(c, *Pointer<Int>(element + 4), 1);
838		case FORMAT_R32I:
839		case FORMAT_R32UI:
840			c = Insert(c, *Pointer<Int>(element), 0);
841			break;
842		default:
843			return false;
844		}
845
846		return true;
847	}
848
849	bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
850	{
851		bool writeR = state.writeRed;
852		bool writeG = state.writeGreen;
853		bool writeB = state.writeBlue;
854		bool writeA = state.writeAlpha;
855		bool writeRGBA = writeR && writeG && writeB && writeA;
856
857		switch(state.destFormat)
858		{
859		case FORMAT_A8B8G8R8I:
860			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
861		case FORMAT_X8B8G8R8I:
862			if(writeA && (state.destFormat != FORMAT_A8B8G8R8I))
863			{
864				*Pointer<SByte>(element + 3) = SByte(0x7F);
865			}
866			if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
867		case FORMAT_G8R8I:
868			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
869		case FORMAT_R8I:
870			if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
871			break;
872		case FORMAT_A8B8G8R8UI:
873			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
874		case FORMAT_X8B8G8R8UI:
875			if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI))
876			{
877				*Pointer<Byte>(element + 3) = Byte(0xFF);
878			}
879			if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
880		case FORMAT_G8R8UI:
881			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
882		case FORMAT_R8UI:
883			if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
884			break;
885		case FORMAT_A16B16G16R16I:
886			if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
887		case FORMAT_X16B16G16R16I:
888			if(writeA && (state.destFormat != FORMAT_A16B16G16R16I))
889			{
890				*Pointer<Short>(element + 6) = Short(0x7FFF);
891			}
892			if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
893		case FORMAT_G16R16I:
894			if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
895		case FORMAT_R16I:
896			if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
897			break;
898		case FORMAT_A16B16G16R16UI:
899			if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
900		case FORMAT_X16B16G16R16UI:
901			if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI))
902			{
903				*Pointer<UShort>(element + 6) = UShort(0xFFFF);
904			}
905			if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
906		case FORMAT_G16R16UI:
907			if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
908		case FORMAT_R16UI:
909			if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
910			break;
911		case FORMAT_A32B32G32R32I:
912			if(writeRGBA)
913			{
914				*Pointer<Int4>(element) = c;
915			}
916			else
917			{
918				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
919				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
920				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
921				if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
922			}
923			break;
924		case FORMAT_X32B32G32R32I:
925			if(writeRGBA)
926			{
927				*Pointer<Int4>(element) = c;
928			}
929			else
930			{
931				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
932				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
933				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
934			}
935			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
936			break;
937		case FORMAT_G32R32I:
938			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
939			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
940			break;
941		case FORMAT_R32I:
942			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
943			break;
944		case FORMAT_A32B32G32R32UI:
945			if(writeRGBA)
946			{
947				*Pointer<UInt4>(element) = As<UInt4>(c);
948			}
949			else
950			{
951				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
952				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
953				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
954				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
955			}
956			break;
957		case FORMAT_X32B32G32R32UI:
958			if(writeRGBA)
959			{
960				*Pointer<UInt4>(element) = As<UInt4>(c);
961			}
962			else
963			{
964				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
965				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
966				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
967			}
968			if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
969			break;
970		case FORMAT_G32R32UI:
971			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
972			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
973			break;
974		case FORMAT_R32UI:
975			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
976			break;
977		default:
978			return false;
979		}
980
981		return true;
982	}
983
984	bool Blitter::GetScale(float4 &scale, Format format)
985	{
986		switch(format)
987		{
988		case FORMAT_L8:
989		case FORMAT_A8:
990		case FORMAT_A8R8G8B8:
991		case FORMAT_X8R8G8B8:
992		case FORMAT_R8:
993		case FORMAT_G8R8:
994		case FORMAT_R8G8B8:
995		case FORMAT_B8G8R8:
996		case FORMAT_X8B8G8R8:
997		case FORMAT_A8B8G8R8:
998		case FORMAT_SRGB8_X8:
999		case FORMAT_SRGB8_A8:
1000			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1001			break;
1002		case FORMAT_R8_SNORM:
1003		case FORMAT_G8R8_SNORM:
1004		case FORMAT_X8B8G8R8_SNORM:
1005		case FORMAT_A8B8G8R8_SNORM:
1006			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1007			break;
1008		case FORMAT_A16B16G16R16:
1009			scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1010			break;
1011		case FORMAT_R8I:
1012		case FORMAT_R8UI:
1013		case FORMAT_G8R8I:
1014		case FORMAT_G8R8UI:
1015		case FORMAT_X8B8G8R8I:
1016		case FORMAT_X8B8G8R8UI:
1017		case FORMAT_A8B8G8R8I:
1018		case FORMAT_A8B8G8R8UI:
1019		case FORMAT_R16I:
1020		case FORMAT_R16UI:
1021		case FORMAT_G16R16:
1022		case FORMAT_G16R16I:
1023		case FORMAT_G16R16UI:
1024		case FORMAT_X16B16G16R16I:
1025		case FORMAT_X16B16G16R16UI:
1026		case FORMAT_A16B16G16R16I:
1027		case FORMAT_A16B16G16R16UI:
1028		case FORMAT_R32I:
1029		case FORMAT_R32UI:
1030		case FORMAT_G32R32I:
1031		case FORMAT_G32R32UI:
1032		case FORMAT_X32B32G32R32I:
1033		case FORMAT_X32B32G32R32UI:
1034		case FORMAT_A32B32G32R32I:
1035		case FORMAT_A32B32G32R32UI:
1036		case FORMAT_A32B32G32R32F:
1037		case FORMAT_X32B32G32R32F:
1038		case FORMAT_X32B32G32R32F_UNSIGNED:
1039		case FORMAT_B32G32R32F:
1040		case FORMAT_G32R32F:
1041		case FORMAT_R32F:
1042		case FORMAT_A2B10G10R10UI:
1043			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1044			break;
1045		case FORMAT_R5G6B5:
1046			scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1047			break;
1048		case FORMAT_A2B10G10R10:
1049			scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1050			break;
1051		case FORMAT_D16:
1052			scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1053			break;
1054		case FORMAT_D24S8:
1055			scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1056			break;
1057		case FORMAT_D32:
1058			scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f);
1059			break;
1060		case FORMAT_D32F:
1061		case FORMAT_D32FS8:
1062		case FORMAT_D32F_COMPLEMENTARY:
1063		case FORMAT_D32FS8_COMPLEMENTARY:
1064		case FORMAT_D32F_LOCKABLE:
1065		case FORMAT_D32FS8_TEXTURE:
1066		case FORMAT_D32F_SHADOW:
1067		case FORMAT_D32FS8_SHADOW:
1068		case FORMAT_S8:
1069			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1070			break;
1071		default:
1072			return false;
1073		}
1074
1075		return true;
1076	}
1077
1078	bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1079	{
1080		float4 scale, unscale;
1081		if(state.clearOperation &&
1082		   Surface::isNonNormalizedInteger(state.sourceFormat) &&
1083		   !Surface::isNonNormalizedInteger(state.destFormat))
1084		{
1085			// If we're clearing a buffer from an int or uint color into a normalized color,
1086			// then the whole range of the int or uint color must be scaled between 0 and 1.
1087			switch(state.sourceFormat)
1088			{
1089			case FORMAT_A32B32G32R32I:
1090				unscale = replicate(static_cast<float>(0x7FFFFFFF));
1091				break;
1092			case FORMAT_A32B32G32R32UI:
1093				unscale = replicate(static_cast<float>(0xFFFFFFFF));
1094				break;
1095			default:
1096				return false;
1097			}
1098		}
1099		else if(!GetScale(unscale, state.sourceFormat))
1100		{
1101			return false;
1102		}
1103
1104		if(!GetScale(scale, state.destFormat))
1105		{
1106			return false;
1107		}
1108
1109		bool srcSRGB = Surface::isSRGBformat(state.sourceFormat);
1110		bool dstSRGB = Surface::isSRGBformat(state.destFormat);
1111
1112		if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB))   // One of the formats is sRGB encoded.
1113		{
1114			value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1115			                     Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1116			value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1117			value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1118		}
1119		else if(unscale != scale)
1120		{
1121			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1122		}
1123
1124		if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED)
1125		{
1126			value = Max(value, Float4(0.0f));  // TODO: Only necessary if source is signed.
1127		}
1128		else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1129		{
1130			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1131
1132			value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1133			                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1134			                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1135			                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1136		}
1137
1138		return true;
1139	}
1140
1141	Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1142	{
1143		if(!quadLayout)
1144		{
1145			return y * pitchB + x * bytes;
1146		}
1147		else
1148		{
1149			// (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1150			return (y & Int(~1)) * pitchB +
1151			       ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes;
1152		}
1153	}
1154
1155	Float4 Blitter::LinearToSRGB(Float4 &c)
1156	{
1157		Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1158		Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1159
1160		Float4 s = c;
1161		s.xyz = Max(lc, ec);
1162
1163		return s;
1164	}
1165
1166	Float4 Blitter::sRGBtoLinear(Float4 &c)
1167	{
1168		Float4 lc = c * Float4(1.0f / 12.92f);
1169		Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1170
1171		Int4 linear = CmpLT(c, Float4(0.04045f));
1172
1173		Float4 s = c;
1174		s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec)));   // FIXME: IfThenElse()
1175
1176		return s;
1177	}
1178
1179	Routine *Blitter::generate(const State &state)
1180	{
1181		Function<Void(Pointer<Byte>)> function;
1182		{
1183			Pointer<Byte> blit(function.Arg<0>());
1184
1185			Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1186			Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1187			Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1188			Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1189
1190			Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1191			Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1192			Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1193			Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1194
1195			Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1196			Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1197			Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1198			Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1199
1200			Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1201			Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1202
1203			bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1204			bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1205			bool intBoth = intSrc && intDst;
1206			bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1207			bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1208			int srcBytes = Surface::bytes(state.sourceFormat);
1209			int dstBytes = Surface::bytes(state.destFormat);
1210
1211			bool hasConstantColorI = false;
1212			Int4 constantColorI;
1213			bool hasConstantColorF = false;
1214			Float4 constantColorF;
1215			if(state.clearOperation)
1216			{
1217				if(intBoth) // Integer types
1218				{
1219					if(!read(constantColorI, source, state))
1220					{
1221						return nullptr;
1222					}
1223					hasConstantColorI = true;
1224				}
1225				else
1226				{
1227					if(!read(constantColorF, source, state))
1228					{
1229						return nullptr;
1230					}
1231					hasConstantColorF = true;
1232
1233					if(!ApplyScaleAndClamp(constantColorF, state))
1234					{
1235						return nullptr;
1236					}
1237				}
1238			}
1239
1240			Float y = y0;
1241
1242			For(Int j = y0d, j < y1d, j++)
1243			{
1244				Float x = x0;
1245				Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1246
1247				For(Int i = x0d, i < x1d, i++)
1248				{
1249					Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1250
1251					if(hasConstantColorI)
1252					{
1253						if(!write(constantColorI, d, state))
1254						{
1255							return nullptr;
1256						}
1257					}
1258					else if(hasConstantColorF)
1259					{
1260						for(int s = 0; s < state.destSamples; s++)
1261						{
1262							if(!write(constantColorF, d, state))
1263							{
1264								return nullptr;
1265							}
1266
1267							d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1268						}
1269					}
1270					else if(intBoth) // Integer types do not support filtering
1271					{
1272						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1273						Int X = Int(x);
1274						Int Y = Int(y);
1275
1276						if(state.clampToEdge)
1277						{
1278							X = Clamp(X, 0, sWidth - 1);
1279							Y = Clamp(Y, 0, sHeight - 1);
1280						}
1281
1282						Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1283
1284						if(!read(color, s, state))
1285						{
1286							return nullptr;
1287						}
1288
1289						if(!write(color, d, state))
1290						{
1291							return nullptr;
1292						}
1293					}
1294					else
1295					{
1296						Float4 color;
1297
1298						bool preScaled = false;
1299						if(!state.filter || intSrc)
1300						{
1301							Int X = Int(x);
1302							Int Y = Int(y);
1303
1304							if(state.clampToEdge)
1305							{
1306								X = Clamp(X, 0, sWidth - 1);
1307								Y = Clamp(Y, 0, sHeight - 1);
1308							}
1309
1310							Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1311
1312							if(!read(color, s, state))
1313							{
1314								return nullptr;
1315							}
1316						}
1317						else   // Bilinear filtering
1318						{
1319							Float X = x;
1320							Float Y = y;
1321
1322							if(state.clampToEdge)
1323							{
1324								X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1325								Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1326							}
1327
1328							Float x0 = X - 0.5f;
1329							Float y0 = Y - 0.5f;
1330
1331							Int X0 = Max(Int(x0), 0);
1332							Int Y0 = Max(Int(y0), 0);
1333
1334							Int X1 = X0 + 1;
1335							Int Y1 = Y0 + 1;
1336							X1 = IfThenElse(X1 >= sWidth, X0, X1);
1337							Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1338
1339							Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1340							Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1341							Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1342							Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1343
1344							Float4 c00; if(!read(c00, s00, state)) return nullptr;
1345							Float4 c01; if(!read(c01, s01, state)) return nullptr;
1346							Float4 c10; if(!read(c10, s10, state)) return nullptr;
1347							Float4 c11; if(!read(c11, s11, state)) return nullptr;
1348
1349							if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB
1350							{
1351								if(!ApplyScaleAndClamp(c00, state)) return nullptr;
1352								if(!ApplyScaleAndClamp(c01, state)) return nullptr;
1353								if(!ApplyScaleAndClamp(c10, state)) return nullptr;
1354								if(!ApplyScaleAndClamp(c11, state)) return nullptr;
1355								preScaled = true;
1356							}
1357
1358							Float4 fx = Float4(x0 - Float(X0));
1359							Float4 fy = Float4(y0 - Float(Y0));
1360							Float4 ix = Float4(1.0f) - fx;
1361							Float4 iy = Float4(1.0f) - fy;
1362
1363							color = (c00 * ix + c01 * fx) * iy +
1364							        (c10 * ix + c11 * fx) * fy;
1365						}
1366
1367						if(!ApplyScaleAndClamp(color, state, preScaled))
1368						{
1369							return nullptr;
1370						}
1371
1372						for(int s = 0; s < state.destSamples; s++)
1373						{
1374							if(!write(color, d, state))
1375							{
1376								return nullptr;
1377							}
1378
1379							d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1380						}
1381					}
1382
1383					if(!state.clearOperation) { x += w; }
1384				}
1385
1386				if(!state.clearOperation) { y += h; }
1387			}
1388		}
1389
1390		return function(L"BlitRoutine");
1391	}
1392
1393	bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options)
1394	{
1395		ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1396
1397		Rect dRect = destRect;
1398		RectF sRect = sourceRect;
1399		if(destRect.x0 > destRect.x1)
1400		{
1401			swap(dRect.x0, dRect.x1);
1402			swap(sRect.x0, sRect.x1);
1403		}
1404		if(destRect.y0 > destRect.y1)
1405		{
1406			swap(dRect.y0, dRect.y1);
1407			swap(sRect.y0, sRect.y1);
1408		}
1409
1410		State state(options);
1411		state.clampToEdge = (sourceRect.x0 < 0.0f) ||
1412		                    (sourceRect.y0 < 0.0f) ||
1413		                    (sourceRect.x1 > (float)source->getWidth()) ||
1414		                    (sourceRect.y1 > (float)source->getHeight());
1415
1416		bool useSourceInternal = !source->isExternalDirty();
1417		bool useDestInternal = !dest->isExternalDirty();
1418		bool isStencil = options.useStencil;
1419
1420		state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1421		state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1422		state.destSamples = dest->getSamples();
1423
1424		criticalSection.lock();
1425		Routine *blitRoutine = blitCache->query(state);
1426
1427		if(!blitRoutine)
1428		{
1429			blitRoutine = generate(state);
1430
1431			if(!blitRoutine)
1432			{
1433				criticalSection.unlock();
1434				return false;
1435			}
1436
1437			blitCache->add(state, blitRoutine);
1438		}
1439
1440		criticalSection.unlock();
1441
1442		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1443
1444		BlitData data;
1445
1446		bool isRGBA = options.writeMask == 0xF;
1447		bool isEntireDest = dest->isEntire(destRect);
1448
1449		data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1450		                          source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1451		data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1452		                        dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1453		data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1454		data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1455		data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal);
1456
1457		data.w = sRect.width() / dRect.width();
1458		data.h = sRect.height() / dRect.height();
1459		data.x0 = sRect.x0 + 0.5f * data.w;
1460		data.y0 = sRect.y0 + 0.5f * data.h;
1461
1462		data.x0d = dRect.x0;
1463		data.x1d = dRect.x1;
1464		data.y0d = dRect.y0;
1465		data.y1d = dRect.y1;
1466
1467		data.sWidth = source->getWidth();
1468		data.sHeight = source->getHeight();
1469
1470		blitFunction(&data);
1471
1472		if(isStencil)
1473		{
1474			source->unlockStencil();
1475			dest->unlockStencil();
1476		}
1477		else
1478		{
1479			source->unlock(useSourceInternal);
1480			dest->unlock(useDestInternal);
1481		}
1482
1483		return true;
1484	}
1485}
1486