1/**
2 **
3 ** Copyright 2011, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18#include "src/pixelflinger2/pixelflinger2.h"
19#include "src/pixelflinger2/llvm_helper.h"
20#include "src/mesa/main/mtypes.h"
21
22#include <llvm/Module.h>
23
24//#undef ALOGD
25//#define ALOGD(...)
26
27using namespace llvm;
28
29static void StencilOp(IRBuilder<> &builder, const unsigned char op,
30                      Value * sPtr, Value * sRef)
31{
32   CondBranch condBranch(builder);
33   Value * s = builder.CreateLoad(sPtr, "stenciOpS");
34   switch (op) {
35   case 0 : // GL_ZERO
36      builder.CreateStore(builder.getInt8(0), sPtr);
37      break;
38   case 1 : // GL_KEEP
39      builder.CreateStore(s, sPtr);
40      break;
41   case 2 : // GL_REPLACE
42      builder.CreateStore(sRef, sPtr);
43      break;
44   case 3 : // GL_INCR
45      condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(255)));
46      builder.CreateStore(s, sPtr);
47      condBranch.elseop();
48      builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
49      condBranch.endif();
50      break;
51   case 4 : // GL_DECR
52      condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(0)));
53      builder.CreateStore(s, sPtr);
54      condBranch.elseop();
55      builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
56      condBranch.endif();
57      break;
58   case 5 : // GL_INVERT
59      builder.CreateStore(builder.CreateNot(s), sPtr);
60      break;
61   case 6 : // GL_INCR_WRAP
62      builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
63      break;
64   case 7 : // GL_DECR_WRAP
65      builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
66      break;
67   default:
68      assert(0);
69      break;
70   }
71}
72
73static Value * StencilOp(IRBuilder<> & builder, Value * face,
74                         const unsigned char frontOp, const unsigned char backOp,
75                         Value * sPtr, Value * sRef)
76{
77   CondBranch condBranch(builder);
78   if (frontOp != backOp)
79      condBranch.ifCond(builder.CreateICmpEQ(face, builder.getInt8(0)));
80
81   StencilOp(builder, frontOp, sPtr, sRef);
82
83   if (frontOp != backOp) {
84      condBranch.elseop();
85      StencilOp(builder, backOp, sPtr, sRef);
86      condBranch.endif();
87   }
88   return builder.CreateLoad(sPtr);
89}
90
91static void StencilFunc(IRBuilder<> & builder, const unsigned char func,
92                        Value * s, Value * sRef, Value * sCmpPtr)
93{
94   switch (func) {
95   case GL_NEVER & 0x7:
96      builder.CreateStore(builder.getFalse(), sCmpPtr);
97      break;
98   case GL_LESS & 0x7:
99      builder.CreateStore(builder.CreateICmpULT(sRef, s), sCmpPtr);
100      break;
101   case GL_EQUAL & 0x7:
102      builder.CreateStore(builder.CreateICmpEQ(sRef, s), sCmpPtr);
103      break;
104   case GL_LEQUAL & 0x7:
105      builder.CreateStore(builder.CreateICmpULE(sRef, s), sCmpPtr);
106      break;
107   case GL_GREATER & 0x7:
108      builder.CreateStore(builder.CreateICmpUGT(sRef, s), sCmpPtr);
109      break;
110   case GL_NOTEQUAL & 0x7:
111      builder.CreateStore(builder.CreateICmpNE(sRef, s), sCmpPtr);
112      break;
113   case GL_GEQUAL & 0x7:
114      builder.CreateStore(builder.CreateICmpUGE(sRef, s), sCmpPtr);
115      break;
116   case GL_ALWAYS & 0x7:
117      builder.CreateStore(builder.getTrue(), sCmpPtr);
118      break;
119   default:
120      assert(0);
121      break;
122   }
123}
124
125static Value * BlendFactor(const unsigned mode, Value * src, Value * dst,
126                           Value * constant, Value * one, Value * zero,
127                           Value * srcA, Value * dstA, Value * constantA,
128                           Value * sOne, const bool isVector, IRBuilder<> & builder)
129{
130   Value * factor = NULL;
131   switch (mode) {
132   case GGLBlendState::GGL_ZERO:
133      factor = zero;
134      break;
135   case GGLBlendState::GGL_ONE:
136      factor = one;
137      break;
138   case GGLBlendState::GGL_SRC_COLOR:
139      factor = src;
140      break;
141   case GGLBlendState::GGL_ONE_MINUS_SRC_COLOR:
142      factor = builder.CreateSub(one, src);
143      break;
144   case GGLBlendState::GGL_DST_COLOR:
145      factor = dst;
146      break;
147   case GGLBlendState::GGL_ONE_MINUS_DST_COLOR:
148      factor = builder.CreateSub(one, dst);
149      break;
150   case GGLBlendState::GGL_SRC_ALPHA:
151      factor = srcA;
152      if (isVector)
153         factor = intVec(builder, factor, factor, factor, factor);
154      break;
155   case GGLBlendState::GGL_ONE_MINUS_SRC_ALPHA:
156      factor = builder.CreateSub(sOne, srcA);
157      if (isVector)
158         factor = intVec(builder, factor, factor, factor, factor);
159      break;
160   case GGLBlendState::GGL_DST_ALPHA:
161      factor = dstA;
162      if (isVector)
163         factor = intVec(builder, factor, factor, factor, factor);
164      break;
165   case GGLBlendState::GGL_ONE_MINUS_DST_ALPHA:
166      factor = builder.CreateSub(sOne, dstA);
167      if (isVector)
168         factor = intVec(builder, factor, factor, factor, factor);
169      break;
170   case GGLBlendState::GGL_SRC_ALPHA_SATURATE:
171      // valid only for source color and alpha
172      factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA));
173      if (isVector)
174         factor = intVec(builder, factor, factor, factor, sOne);
175      else
176         factor = sOne; // when it's used for source alpha, it's just 1
177      break;
178   case GGLBlendState::GGL_CONSTANT_COLOR:
179      factor = constant;
180      break;
181   case GGLBlendState::GGL_ONE_MINUS_CONSTANT_COLOR:
182      factor = builder.CreateSub(one, constant);
183      break;
184   case GGLBlendState::GGL_CONSTANT_ALPHA:
185      factor = constantA;
186      if (isVector)
187         factor = intVec(builder, factor, factor, factor, factor);
188      break;
189   case GGLBlendState::GGL_ONE_MINUS_CONSTANT_ALPHA:
190      factor = builder.CreateSub(sOne, constantA);
191      if (isVector)
192         factor = intVec(builder, factor, factor, factor, factor);
193      break;
194   default:
195      assert(0);
196      break;
197   }
198   return factor;
199}
200
201static Value * Saturate(IRBuilder<> & builder, Value * intVector)
202{
203   intVector = intVecMax(builder, intVector, constIntVec(builder, 0,0,0,0));
204   return intVecMin(builder, intVector, constIntVec(builder, 255,255,255,255));
205}
206
207// src is int32x4 [0,255] rgba vector, and combines them into int32
208// RGB_565 channel order is weird
209static Value * IntVectorToScreenColor(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
210{
211   if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
212      src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24));
213      std::vector<Value *> comps = extractVector(builder, src);
214      comps[0] = builder.CreateOr(comps[0], comps[1]);
215      comps[0] = builder.CreateOr(comps[0], comps[2]);
216      comps[0] = builder.CreateOr(comps[0], comps[3]);
217      return comps[0];
218   } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
219      src = builder.CreateAnd(src, constIntVec(builder, 0xf8, 0xfc, 0xf8, 0));
220      std::vector<Value *> comps = extractVector(builder, src);
221      // channel order is weird
222      for (unsigned i = 0; i < 4; i++)
223         comps[i] = builder.CreateTrunc(comps[i], builder.getInt16Ty());
224      comps[2] = builder.CreateLShr(comps[2], 3);
225      comps[1] = builder.CreateShl(comps[1], 3);
226      comps[0] = builder.CreateShl(comps[0], 8);
227
228      comps[0] = builder.CreateOr(comps[0], comps[1]);
229      comps[0] = builder.CreateOr(comps[0], comps[2]);
230      return comps[0];
231   } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
232      return builder.getInt32(0);
233   else
234      assert(0);
235   return NULL;
236}
237
238// src is int32 or int16, return is int32x4 [0,255] rgba
239// RGB_565 channel order is weird
240static Value * ScreenColorToIntVector(IRBuilder<> & builder, const GGLPixelFormat format, Value * src)
241{
242   src = builder.CreateZExt(src, builder.getInt32Ty());
243   Value * dst = intVec(builder, src, src, src, src);
244   if (GGL_PIXEL_FORMAT_RGBA_8888 == format) {
245      dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24));
246      dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff));
247   } else if (GGL_PIXEL_FORMAT_RGB_565 == format) {
248      // channel order is weird
249      dst = builder.CreateAnd(dst, constIntVec(builder, 0xf800, 0x7e0, 0x1f, 0));
250      dst = builder.CreateLShr(dst, constIntVec(builder, 8, 3, 0, 0));
251      dst = builder.CreateShl(dst, constIntVec(builder, 0, 0, 3, 0));
252      dst = builder.CreateOr(dst, constIntVec(builder, 0, 0, 0, 0xff));
253   } else if (GGL_PIXEL_FORMAT_UNKNOWN == format)
254      ALOGD("pf2: ScreenColorToIntVector GGL_PIXEL_FORMAT_UNKNOWN"); // not set yet, do nothing
255   else
256      assert(0);
257   return dst;
258}
259
260// src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32
261Value * GenerateFSBlend(const GGLState * gglCtx, const GGLPixelFormat format, /*const RegDesc * regDesc,*/
262                        IRBuilder<> & builder, Value * src, Value * dst)
263{
264   Type * const intType = builder.getInt32Ty();
265
266   // TODO cast the outputs pointer type to int for writing to minimize bandwidth
267   if (!gglCtx->blendState.enable) {
268//        if (regDesc->IsInt32Color())
269//        {
270//            debug_printf("GenerateFixedFS dst is already scalar fixed0 \n");
271//            src = builder.CreateExtractElement(src, builder.getInt32(0));
272//            src = builder.CreateBitCast(src, intType); // it's already RGBA int32
273//        }
274//        else if (regDesc->IsVectorType(Float))
275//        {
276      src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
277      src = builder.CreateFPToSI(src, intVecType(builder));
278      src = Saturate(builder, src);
279      src = IntVectorToScreenColor(builder, format, src);
280//        }
281//        else if (regDesc->IsVectorType(Fixed8))
282//        {
283//            src = builder.CreateBitCast(src, instr->GetIntVectorType());
284//            src = Saturate(instr, src);
285//            src = IntVectorToColor(instr, storage, src);
286//        }
287//        else if (regDesc->IsVectorType(Fixed16))
288//        {
289//            src = builder.CreateBitCast(src, instr->GetIntVectorType());
290//            src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
291//            src = Saturate(instr, src);
292//            src = IntVectorToColor(instr, storage, src);
293//        }
294//        else
295//            assert(0);
296      return src;
297   }
298   // blending, so convert src to <4 x i32>
299//    if (regDesc->IsInt32Color())
300//    {
301//        src = builder.CreateExtractElement(src, builder.getInt32(0));
302//        src = builder.CreateBitCast(src, intType); // it's already RGBA int32
303//
304//        Value * channels = Constant::getNullValue(instr->GetIntVectorType());
305//        channels = builder.CreateInsertElement(channels, src, builder.getInt32(0));
306//        channels = builder.CreateInsertElement(channels, src, builder.getInt32(1));
307//        channels = builder.CreateInsertElement(channels, src, builder.getInt32(2));
308//        channels = builder.CreateInsertElement(channels, src, builder.getInt32(3));
309//        channels = builder.CreateLShr(channels, constIntVec(builder,0, 8, 16, 24));
310//        channels = builder.CreateAnd(channels, constIntVec(builder,0xff, 0xff, 0xff, 0xff));
311//        src = channels;
312//    }
313//    else if (regDesc->IsVectorType(Fixed8)) // it's already int32x4 RGBA
314//        src = builder.CreateBitCast(src, instr->GetIntVectorType());
315//    else if (regDesc->IsVectorType(Fixed16))
316//    {
317//        src = builder.CreateBitCast(src, instr->GetIntVectorType());
318//        // TODO DXL consider shl dst by 8 and ashr by 16 in the end for more precision
319//        src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
320//    }
321//    else if (regDesc->IsVectorType(Float))
322//    {
323   src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
324   src = builder.CreateFPToSI(src, intVecType(builder));
325//    }
326//    else
327//        assert(0);
328
329   Value * const one = constIntVec(builder,255,255,255,255);
330   Value * const zero = constIntVec(builder,0,0,0,0);
331   Value * const sOne = builder.getInt32(255);
332   Value * const sZero = builder.getInt32(0);
333
334#if USE_LLVM_SCANLINE
335   Value * constant = constIntVec(builder,gglCtx->blendState.color[0],
336                                  gglCtx->blendState.color[1],
337                                  gglCtx->blendState.color[2],
338                                  gglCtx->blendState.color[3]);
339#else
340   Value * constant = NULL;
341   assert(0);
342#endif
343
344   Value * srcA = extractVector(builder,src)[3];
345   Value * dstA = extractVector(builder,dst)[3];
346   Value * constantA = extractVector(builder,constant)[3];
347
348   Value * sf = BlendFactor(gglCtx->blendState.scf, src, dst,
349                            constant, one, zero, srcA, dstA,
350                            constantA, sOne, true, builder);
351   if (gglCtx->blendState.scf != gglCtx->blendState.saf) {
352      Value * sfA = BlendFactor(gglCtx->blendState.saf, srcA, dstA,
353                                constantA, sOne, sZero, srcA, dstA,
354                                constantA, sOne, false, builder);
355      sf = builder.CreateInsertElement(sf, sfA, builder.getInt32(3),
356                                       name("sfAStore"));
357   }
358
359   Value * df = BlendFactor(gglCtx->blendState.dcf, src, dst,
360                            constant, one, zero, srcA, dstA,
361                            constantA, sOne, true, builder);
362   if (gglCtx->blendState.dcf != gglCtx->blendState.daf) {
363      Value * dfA = BlendFactor(gglCtx->blendState.daf, srcA, dstA,
364                                constantA, sOne, sZero, srcA, dstA,
365                                constantA, sOne, false, builder);
366      df = builder.CreateInsertElement(df, dfA, builder.getInt32(3),
367                                       name("dfAStore"));
368   }
369
370   // this is factor *= 256 / 255; factors have a chance of constant folding
371   sf = builder.CreateAdd(sf, builder.CreateLShr(sf, constIntVec(builder,7,7,7,7)));
372   df = builder.CreateAdd(df, builder.CreateLShr(df, constIntVec(builder,7,7,7,7)));
373
374   src = builder.CreateMul(src, sf);
375   dst = builder.CreateMul(dst, df);
376
377   Value * res = NULL;
378   switch (gglCtx->blendState.ce + GL_FUNC_ADD) {
379   case GL_FUNC_ADD:
380      res = builder.CreateAdd(src, dst);
381      break;
382   case GL_FUNC_SUBTRACT:
383      res = builder.CreateSub(src, dst);
384      break;
385   case GL_FUNC_REVERSE_SUBTRACT:
386      res = builder.CreateSub(dst, src);
387      break;
388   default:
389      assert(0);
390      break;
391   }
392   if (gglCtx->blendState.ce != gglCtx->blendState.ae) {
393      srcA = extractVector(builder,src)[3];
394      dstA = extractVector(builder,dst)[3];
395      Value * resA = NULL;
396      switch (gglCtx->blendState.ae + GL_FUNC_ADD) {
397      case GL_FUNC_ADD:
398         resA = builder.CreateAdd(srcA, dstA);
399         break;
400      case GL_FUNC_SUBTRACT:
401         resA = builder.CreateSub(srcA, dstA);
402         break;
403      case GL_FUNC_REVERSE_SUBTRACT:
404         resA = builder.CreateSub(dstA, srcA);
405         break;
406      default:
407         assert(0);
408         break;
409      }
410      res = builder.CreateInsertElement(res, resA, builder.getInt32(3),
411                                        name("resAStore"));
412   }
413
414   res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8));
415   res = Saturate(builder, res);
416   res = IntVectorToScreenColor(builder, format, res);
417   return res;
418}
419
420static FunctionType * ScanLineFunctionType(IRBuilder<> & builder)
421{
422   std::vector<Type*> funcArgs;
423   VectorType * vectorType = floatVecType(builder);
424   PointerType * vectorPtr = PointerType::get(vectorType, 0);
425   Type * intType = builder.getInt32Ty();
426   PointerType * intPointerType = PointerType::get(intType, 0);
427   PointerType * bytePointerType = PointerType::get(builder.getInt8Ty(), 0);
428
429   funcArgs.push_back(vectorPtr); // start
430   funcArgs.push_back(vectorPtr); // step
431   funcArgs.push_back(vectorPtr); // constants
432   funcArgs.push_back(intPointerType); // frame
433   funcArgs.push_back(intPointerType); // depth
434   funcArgs.push_back(bytePointerType); // stencil
435   funcArgs.push_back(bytePointerType); // stencil state
436   funcArgs.push_back(intType); // count
437
438   FunctionType *functionType = FunctionType::get(/*Result=*/builder.getVoidTy(),
439                                                  llvm::ArrayRef<Type*>(funcArgs),
440                                                  /*isVarArg=*/false);
441
442   return functionType;
443}
444
445// generated scanline function parameters are VertexOutput * start, VertexOutput * step,
446// unsigned * frame, int * depth, unsigned char * stencil,
447// GGLActiveStencilState * stencilState, unsigned count
448void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program, Module * mod,
449                      const char * shaderName, const char * scanlineName)
450{
451   IRBuilder<> builder(mod->getContext());
452//   debug_printf("GenerateScanLine %s \n", scanlineName);
453
454   Type * intType = builder.getInt32Ty();
455   PointerType * intPointerType = PointerType::get(intType, 0);
456   Type * byteType = builder.getInt8Ty();
457   PointerType * bytePointerType = PointerType::get(byteType, 0);
458
459   Function * func = mod->getFunction(scanlineName);
460   if (func)
461      return;
462
463   func = llvm::cast<Function>(mod->getOrInsertFunction(scanlineName,
464                               ScanLineFunctionType(builder)));
465
466   BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0);
467   builder.SetInsertPoint(label_entry);
468   CondBranch condBranch(builder);
469
470   Function::arg_iterator args = func->arg_begin();
471   Value * start = args++;
472   start->setName("start");
473   Value * step = args++;
474   step->setName("step");
475   Value * constants = args++;
476   constants->setName("constants");
477
478   // need alloc to be able to assign to it by using store
479   Value * framePtr = builder.CreateAlloca(intPointerType);
480   builder.CreateStore(args++, framePtr);
481   Value * depthPtr = builder.CreateAlloca(intPointerType);
482   builder.CreateStore(args++, depthPtr);
483   Value * stencilPtr = builder.CreateAlloca(bytePointerType);
484   builder.CreateStore(args++, stencilPtr);
485   Value * stencilState = args++;
486   stencilState->setName("stencilState");
487   Value * countPtr = builder.CreateAlloca(intType);
488   builder.CreateStore(args++, countPtr);
489
490   Value * sFace = NULL, * sRef = NULL, *sMask = NULL, * sFunc = NULL;
491   if (gglCtx->bufferState.stencilTest) {
492      sFace = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 0), "sFace");
493      if (gglCtx->frontStencil.ref == gglCtx->backStencil.ref)
494         sRef = builder.getInt8(gglCtx->frontStencil.ref);
495      else
496         sRef = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 1), "sRef");
497      if (gglCtx->frontStencil.mask == gglCtx->backStencil.mask)
498         sMask = builder.getInt8(gglCtx->frontStencil.mask);
499      else
500         sMask = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 2), "sMask");
501      if (gglCtx->frontStencil.func == gglCtx->backStencil.func)
502         sFunc = builder.getInt8(gglCtx->frontStencil.func);
503      else
504         sFunc = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 3), "sFunc");
505   }
506
507   condBranch.beginLoop(); // while (count > 0)
508
509   assert(framePtr && gglCtx);
510   // get values
511   Value * frame = NULL;
512   if (GGL_PIXEL_FORMAT_RGBA_8888 == gglCtx->bufferState.colorFormat)
513      frame = builder.CreateLoad(framePtr);
514   else if (GGL_PIXEL_FORMAT_RGB_565 == gglCtx->bufferState.colorFormat) {
515      frame = builder.CreateLoad(framePtr);
516      frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt16Ty(), 0));
517   } else if (GGL_PIXEL_FORMAT_UNKNOWN == gglCtx->bufferState.colorFormat)
518      frame = builder.CreateLoad(framePtr); // color buffer not set yet
519   else
520      assert(0);
521
522   frame->setName("frame");
523   Value * depth = NULL, * stencil = NULL;
524   if (gglCtx->bufferState.depthTest) {
525      assert(GGL_PIXEL_FORMAT_Z_32 == gglCtx->bufferState.depthFormat);
526      depth = builder.CreateLoad(depthPtr);
527      depth->setName("depth");
528   }
529
530   Value * count = builder.CreateLoad(countPtr);
531   count->setName("count");
532
533   Value * cmp = builder.CreateICmpEQ(count, builder.getInt32(0));
534   condBranch.ifCond(cmp, "if_break_loop"); // if (count == 0)
535   condBranch.brk(); // break;
536   condBranch.endif();
537
538   Value * sCmpPtr = NULL, * sCmp = NULL, * sPtr = NULL, * s = NULL;
539   if (gglCtx->bufferState.stencilTest) {
540      stencil = builder.CreateLoad(stencilPtr);
541      stencil->setName("stencil");
542
543      // temporaries to load/store value
544      sCmpPtr = builder.CreateAlloca(builder.getInt1Ty());
545      sCmpPtr->setName("sCmpPtr");
546      sPtr = builder.CreateAlloca(byteType);
547      sPtr->setName("sPtr");
548
549      s = builder.CreateLoad(stencil);
550      s = builder.CreateAnd(s, sMask);
551      builder.CreateStore(s, sPtr);
552
553      if (gglCtx->frontStencil.func != gglCtx->backStencil.func)
554         condBranch.ifCond(builder.CreateICmpEQ(sFace, builder.getInt8(0)));
555
556      StencilFunc(builder, gglCtx->frontStencil.func, s, sRef, sCmpPtr);
557
558      if (gglCtx->frontStencil.func != gglCtx->backStencil.func) {
559         condBranch.elseop();
560         StencilFunc(builder, gglCtx->backStencil.func, s, sRef, sCmpPtr);
561         condBranch.endif();
562      }
563
564      sCmp = builder.CreateLoad(sCmpPtr);
565   } else
566      sCmp = ConstantInt::getTrue(mod->getContext());
567   sCmp->setName("sCmp");
568
569   Value * depthZ = NULL, * zPtr = NULL, * z = NULL, * zCmp = NULL;
570   if (gglCtx->bufferState.depthTest) {
571      depthZ  = builder.CreateLoad(depth, "depthZ"); // z stored in buffer
572      zPtr = builder.CreateAlloca(intType); // temp store for modifying incoming z
573      zPtr->setName("zPtr");
574
575      // modified incoming z
576      z = builder.CreateBitCast(start, intPointerType);
577      z = builder.CreateConstInBoundsGEP1_32(z, (GGL_FS_INPUT_OFFSET +
578                                             GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
579      z = builder.CreateLoad(z, "z");
580
581      builder.CreateStore(z, zPtr);
582
583      Value * zNegative = builder.CreateICmpSLT(z, builder.getInt32(0));
584      condBranch.ifCond(zNegative);
585      // if (0x80000000 & z) z ^= 0x7fffffff since smaller -ve float means bigger -ve int
586      z = builder.CreateXor(z, builder.getInt32(0x7fffffff));
587      builder.CreateStore(z, zPtr);
588
589      condBranch.endif();
590
591      z = builder.CreateLoad(zPtr, "z");
592
593      switch (0x200 | gglCtx->bufferState.depthFunc) {
594      case GL_NEVER:
595         zCmp = ConstantInt::getFalse(mod->getContext());
596         break;
597      case GL_LESS:
598         zCmp = builder.CreateICmpSLT(z, depthZ);
599         break;
600      case GL_EQUAL:
601         zCmp = builder.CreateICmpEQ(z, depthZ);
602         break;
603      case GL_LEQUAL:
604         zCmp = builder.CreateICmpSLE(z, depthZ);
605         break;
606      case GL_GREATER:
607         zCmp = builder.CreateICmpSGT(z, depthZ);
608         break;
609      case GL_NOTEQUAL:
610         zCmp = builder.CreateICmpNE(z, depthZ);
611         break;
612      case GL_GEQUAL:
613         zCmp = builder.CreateICmpSGE(z, depthZ);
614         break;
615      case GL_ALWAYS:
616         zCmp = ConstantInt::getTrue(mod->getContext());
617         break;
618      default:
619         assert(0);
620         break;
621      }
622   } else // no depth test means always pass
623      zCmp = ConstantInt::getTrue(mod->getContext());
624   zCmp->setName("zCmp");
625
626   condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail");
627   condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail");
628
629   Value * inputs = start;
630   Value * outputs = start;
631
632   Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start,
633                       offsetof(VertexOutput,fragColor)/sizeof(Vector4));
634
635   Function * fsFunction = mod->getFunction(shaderName);
636   assert(fsFunction);
637   CallInst *call = builder.CreateCall3(fsFunction,inputs, outputs, constants);
638   call->setCallingConv(CallingConv::C);
639   call->setTailCall(false);
640
641   Value * dst = Constant::getNullValue(intVecType(builder));
642   if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) {
643      Value * frameColor = builder.CreateLoad(frame, "frameColor");
644      dst = ScreenColorToIntVector(builder, gglCtx->bufferState.colorFormat, frameColor);
645   }
646
647   Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0);
648   src = builder.CreateLoad(src);
649
650   Value * color = GenerateFSBlend(gglCtx, gglCtx->bufferState.colorFormat,/*&prog->outputRegDesc,*/ builder, src, dst);
651   builder.CreateStore(color, frame);
652   // TODO DXL depthmask check
653   if (gglCtx->bufferState.depthTest) {
654      z = builder.CreateBitCast(z, intType);
655      builder.CreateStore(z, depth); // store z
656   }
657
658   if (gglCtx->bufferState.stencilTest)
659      builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dPass,
660                                    gglCtx->backStencil.dPass, sPtr, sRef), stencil);
661
662   condBranch.elseop(); // failed z test
663
664   if (gglCtx->bufferState.stencilTest)
665      builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail,
666                                    gglCtx->backStencil.dFail, sPtr, sRef), stencil);
667   condBranch.endif();
668   condBranch.elseop(); // failed s test
669
670   if (gglCtx->bufferState.stencilTest)
671      builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.sFail,
672                                    gglCtx->backStencil.sFail, sPtr, sRef), stencil);
673
674   condBranch.endif();
675   assert(frame);
676   frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++
677   // frame may have been casted to short* from int*, so cast back
678   frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt32Ty(), 0));
679   builder.CreateStore(frame, framePtr);
680   if (gglCtx->bufferState.depthTest) {
681      depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++
682      builder.CreateStore(depth, depthPtr);
683   }
684   if (gglCtx->bufferState.stencilTest) {
685      stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++
686      builder.CreateStore(stencil, stencilPtr);
687   }
688   Value * vPtr = NULL, * v = NULL, * dx = NULL;
689   if (program->UsesFragCoord) {
690      vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
691             GGL_FS_INPUT_FRAGCOORD_INDEX);
692      v = builder.CreateLoad(vPtr);
693      dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
694                                              GGL_FS_INPUT_FRAGCOORD_INDEX);
695      dx = builder.CreateLoad(dx);
696      v = builder.CreateFAdd(v, dx);
697      builder.CreateStore(v, vPtr);
698   } else if (gglCtx->bufferState.depthTest) {
699      Type * floatType = builder.getFloatTy();
700      PointerType * floatPointerType = PointerType::get(floatType, 0);
701      vPtr = builder.CreateBitCast(start, floatPointerType);
702      vPtr = builder.CreateConstInBoundsGEP1_32(vPtr,
703             (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
704      v = builder.CreateLoad(vPtr);
705      dx = builder.CreateBitCast(step, floatPointerType);
706      dx = builder.CreateConstInBoundsGEP1_32(dx,
707                                              (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
708      dx = builder.CreateLoad(dx);
709      v = builder.CreateFAdd(v, dx);
710      builder.CreateStore(v, vPtr);
711   }
712
713   if (program->UsesPointCoord) {
714      vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
715             GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
716      v = builder.CreateLoad(vPtr);
717      dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
718                                              GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
719      dx = builder.CreateLoad(dx);
720      v = builder.CreateFAdd(v, dx);
721      builder.CreateStore(v, vPtr);
722   }
723
724   for (unsigned i = 0; i < program->VaryingSlots; ++i) {
725      vPtr = builder.CreateConstInBoundsGEP1_32(start, offsetof(VertexOutput,varyings)/sizeof(Vector4) + i);
726      v = builder.CreateLoad(vPtr);
727      dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
728                                              GGL_FS_INPUT_VARYINGS_INDEX + i);
729      dx = builder.CreateLoad(dx);
730      v = builder.CreateFAdd(v, dx);
731      builder.CreateStore(v, vPtr);
732   }
733
734   count = builder.CreateSub(count, builder.getInt32(1));
735   builder.CreateStore(count, countPtr); // count--;
736
737   condBranch.endLoop();
738
739   builder.CreateRetVoid();
740}
741