scanline.cpp revision 470970d77c095678830fc512dfe0e97c6bcab15b
1/**
2 **
3 ** Copyright 2010, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18#include <assert.h>
19#include <stdio.h>
20#include <string.h>
21
22#include "src/pixelflinger2/pixelflinger2.h"
23#include "src/pixelflinger2/texture.h"
24#include "src/mesa/main/mtypes.h"
25
26#if !USE_LLVM_SCANLINE
27
28static void Saturate(Vec4<BlendComp_t> * color)
29{
30   color->r = MIN2(MAX2(color->r, 0), 255);
31   color->g = MIN2(MAX2(color->g, 0), 255);
32   color->b = MIN2(MAX2(color->b, 0), 255);
33   color->a = MIN2(MAX2(color->a, 0), 255);
34}
35
36static inline void RGBAIntToRGBAIntx4(unsigned rgba, Vec4<BlendComp_t> * color) __attribute__((always_inline));
37static inline void RGBAIntToRGBAIntx4(unsigned rgba, Vec4<BlendComp_t> * color)
38{
39   color->r = rgba & 0xff;
40   color->g = (rgba >>= 8) & 0xff;
41   color->b = (rgba >>= 8) & 0xff;
42   color->a = (rgba >>= 8);
43}
44
45static inline void RGBAFloatx4ToRGBAIntx4(Vector4 * v, Vec4<BlendComp_t> * color)
46{
47   color->r = v->r * 255;
48   color->g = v->g * 255;
49   color->b = v->b * 255;
50   color->a = v->a * 255;
51}
52
53static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color);
54static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color)
55{
56   return color->r | (color->g << 8) | (color->b << 16) | (color->a << 24);
57}
58
59
60
61//static inline Pixel Vector4ToPixelRGBA(const Vector4 * color) __attribute__((always_inline));
62//static inline Pixel Vector4ToPixelRGBA(const Vector4 * color)
63//{
64//    Pixel pixel;
65//#if defined(__ARM_HAVE_NEON) && USE_NEON
66//    int32x4_t  c = vcvtq_s32_f32(vmulq_n_f32(color->f4, 255.0f));
67//    c = vminq_s32(c, vdupq_n_s32(255));
68//    c = vmaxq_s32(c, vdupq_n_s32(0));
69//    pixel.channels[0] = (unsigned char)vgetq_lane_s32(c, 0);
70//    pixel.channels[1] = (unsigned char)vgetq_lane_s32(c, 1);
71//    pixel.channels[2] = (unsigned char)vgetq_lane_s32(c, 2);
72//    pixel.channels[3] = (unsigned char)vgetq_lane_s32(c, 3);
73//#else
74//    pixel.channels[0] = (unsigned char)MIN2(MAX2((short)(color->r * 255), 0), 255);
75//	pixel.channels[1] = (unsigned char)MIN2(MAX2((short)(color->g * 255), 0), 255);
76//	pixel.channels[2] = (unsigned char)MIN2(MAX2((short)(color->b * 255), 0), 255);
77//	pixel.channels[3] = (unsigned char)MIN2(MAX2((short)(color->a * 255), 0), 255);
78//#endif //#if USE_FIXED_POINT
79//	return pixel;
80//}
81
82template<typename T>
83static inline void BlendFactor(const unsigned mode, T & factor, const T & src,
84                               const T & dst, const T & constant, const T & one,
85                               const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA,
86                               const BlendComp_t & constantA, const BlendComp_t & sOne) __attribute__((always_inline));
87template<typename T>
88static inline void BlendFactor(const unsigned mode, T & factor, const T & src,
89                               const T & dst, const T & constant, const T & one,
90                               const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA,
91                               const BlendComp_t & constantA, const BlendComp_t & sOne)
92{
93   switch (mode) {
94   case 0: // GL_ZERO
95      factor = zero;
96      return;
97   case 1: // GL_ONE
98      factor = one;
99      return;
100   case 2: // GL_SRC_COLOR:
101      factor = src;
102      return;
103   case 3: // GL_ONE_MINUS_SRC_COLOR:
104      factor = one;
105      factor -= src;
106      return;
107   case 4: // GL_DST_COLOR:
108      factor = dst;
109      return;
110   case 5: // GL_ONE_MINUS_DST_COLOR:
111      factor = one;
112      factor -= dst;
113      return;
114   case 6: // GL_SRC_ALPHA:
115      factor = srcA;
116      return;
117   case 7: // GL_ONE_MINUS_SRC_ALPHA:
118      factor = sOne - srcA;
119      return;
120   case 8: // GL_DST_ALPHA:
121      factor = dstA;
122      return;
123   case 9: // GL_ONE_MINUS_DST_ALPHA:
124      factor = sOne - dstA;
125      return;
126   case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color; src alpha = 1
127      factor = MIN2(srcA, sOne - dstA);
128      return;
129   case 11: // GL_CONSTANT_COLOR:
130      factor = constant;
131      return;
132   case 12: // GL_ONE_MINUS_CONSTANT_COLOR:
133      factor = one;
134      factor -= constant;
135      return;
136   case 13: // GL_CONSTANT_ALPHA:
137      factor = constantA;
138      return;
139   case 14: // GL_ONE_MINUS_CONSTANT_ALPHA:
140      factor = sOne - constantA;
141      return;
142   default:
143      assert(0);
144      return;
145   }
146}
147#endif // #if !USE_LLVM_SCANLINE
148
149unsigned char StencilOp(const unsigned op, unsigned char s, const unsigned char ref)
150{
151   switch (op) {
152   case 0: // GL_ZERO
153      return 0;
154   case 1: // GL_KEEP
155      return s;
156   case 2: // GL_REPLACE
157      return ref;
158   case 3: // GL_INCR
159      if (s < 255)
160         return ++s;
161      return s;
162   case 4: // GL_DECR
163      if (s > 0)
164         return --s;
165      return 0;
166   case 5: // GL_INVERT
167      return ~s;
168   case 6: // GL_INCR_WRAP
169      return ++s;
170   case 7: // GL_DECR_WRAP
171      return --s;
172   default:
173      assert(0);
174      return s;
175   }
176}
177
178template <bool StencilTest, bool DepthTest, bool DepthWrite, bool BlendEnable>
179void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexOutput * v2)
180{
181   GGL_GET_CONST_CONTEXT(ctx, iface);
182   //    assert((unsigned)v1->position.y == (unsigned)v2->position.y);
183   //
184   //    assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format);
185   //    assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format);
186   //    assert(ctx->frameSurface.width == ctx->depthSurface.width);
187   //    assert(ctx->frameSurface.height == ctx->depthSurface.height);
188
189   const unsigned int varyingCount = ctx->glCtx->CurrentProgram->VaryingSlots;
190   const unsigned y = v1->position.y, startX = v1->position.x,
191                      endX = v2->position.x;
192
193   //assert(ctx->frameSurface.width > startX && ctx->frameSurface.width > endX);
194   //assert(ctx->frameSurface.height > y);
195
196   unsigned * frame = (unsigned *)ctx->frameSurface.data
197                      + y * ctx->frameSurface.width + startX;
198   const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX));
199
200   //memcpy(ctx->glCtx->CurrentProgram->ValuesVertexOutput, v1, sizeof(*v1));
201   // shader symbols are mapped to gl_shader_program_Values*
202   //VertexOutput & vertex(*(VertexOutput*)ctx->glCtx->CurrentProgram->ValuesVertexOutput);
203   VertexOutput vertex(*v1);
204   VertexOutput vertexDx(*v2);
205
206   vertexDx.position -= v1->position;
207   vertexDx.position *= div;
208   //printf("vertexDx.position.z=%.8g \n", vertexDx.position.z);
209   for (unsigned i = 0; i < varyingCount; i++) {
210      vertexDx.varyings[i] -= v1->varyings[i];
211      vertexDx.varyings[i] *= div;
212   }
213   vertexDx.frontFacingPointCoord -= v1->frontFacingPointCoord;
214   vertexDx.frontFacingPointCoord *= div; // gl_PointCoord, only zw
215   vertexDx.frontFacingPointCoord.y = 0; // gl_FrontFacing not interpolated
216
217#if USE_FORCED_FIXEDPOINT
218   for (unsigned j = 0; j < 4; j++) {
219      for (unsigned i = 0; i < varyingCount; i++) {
220         vertex.varyings[i].i[j] = vertex.varyings[i].f[j] * 65536;
221         vertexDx.varyings[i].i[j] = vertexDx.varyings[i].f[j] * 65536;
222      }
223      vertex.position.i[j] = vertex.position.f[j] * 65536;
224      vertexDx.position.i[j] = vertexDx.position.f[j] * 65536;
225      vertex.frontFacingPointCoord.i[j] = vertex.frontFacingPointCoord.f[j] * 65536;
226   }
227#endif
228
229   int * depth = (int *)ctx->depthSurface.data + y * ctx->frameSurface.width + startX;
230   unsigned char * stencil = (unsigned char *)ctx->stencilSurface.data + y * ctx->frameSurface.width + startX;
231
232#if !USE_LLVM_TEXTURE_SAMPLER
233   extern const GGLContext * textureGGLContext;
234   textureGGLContext = ctx;
235#endif
236
237   // TODO DXL consider inverting gl_FragCoord.y
238
239#if USE_LLVM_SCANLINE
240   typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step, float (*constants)[4],
241                                       unsigned * frame, int * depth, unsigned char * stencil,
242                                       GGLActiveStencilState *, unsigned count);
243
244   ScanLineFunction_t scanLineFunction = (ScanLineFunction_t)
245                                         ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function;
246   if (endX >= startX) {
247      scanLineFunction(&vertex, &vertexDx, ctx->glCtx->CurrentProgram->ValuesUniform, frame, depth, stencil, &ctx->activeStencil, endX - startX + 1);
248   }
249#else
250
251   int z;
252   bool sCmp = true; // default passed, unless failed by stencil test
253   unsigned char s; // masked stored stencil value
254   const unsigned char sMask = ctx->activeStencil.mask;
255   const unsigned char sRef = ctx->activeStencil.ref;
256   const unsigned sFunc = ctx->activeStencil.face ? 0x200 | ctx->backStencil.func :
257                          0x200 | ctx->frontStencil.func;
258   const unsigned ssFail = ctx->activeStencil.face ? ctx->backStencil.sFail :
259                           ctx->frontStencil.sFail;
260   const unsigned sdFail = ctx->activeStencil.face ? ctx->backStencil.dFail :
261                           ctx->frontStencil.dFail;
262   const unsigned sdPass = ctx->activeStencil.face ? ctx->backStencil.dPass :
263                           ctx->frontStencil.dPass;
264
265   for (unsigned x = startX; x <= endX; x++) {
266      //assert(abs((int)(vertex.position.x) - (int)x) < 2);
267      //assert((unsigned)vertex.position.y == y);
268      if (StencilTest) {
269         s = *stencil & sMask;
270         switch (sFunc) {
271         case GL_NEVER:
272            sCmp = false;
273            break;
274         case GL_LESS:
275            sCmp = sRef < s;
276            break;
277         case GL_EQUAL:
278            sCmp = sRef == s;
279            break;
280         case GL_LEQUAL:
281            sCmp = sRef <= s;
282            break;
283         case GL_GREATER:
284            sCmp = sRef > s;
285            break;
286         case GL_NOTEQUAL:
287            sCmp = sRef != s;
288            break;
289         case GL_GEQUAL:
290            sCmp = sRef >= s;
291            break;
292         case GL_ALWAYS:
293            sCmp = true;
294            break;
295         default:
296            assert(0);
297            break;
298         }
299      }
300
301      if (!StencilTest || sCmp) {
302         z = vertex.position.i[2];
303         if (z & 0x80000000)  // negative float has leading 1
304            z ^= 0x7fffffff;  // bigger negative is smaller
305         bool zCmp = true;
306         if (DepthTest) {
307            switch (0x200 | ctx->bufferState.depthFunc) {
308            case GL_NEVER:
309               zCmp = false;
310               break;
311            case GL_LESS:
312               zCmp = z < *depth;
313               break;
314            case GL_EQUAL:
315               zCmp = z == *depth;
316               break;
317            case GL_LEQUAL:
318               zCmp = z <= *depth;
319               break;
320            case GL_GREATER:
321               zCmp = z > *depth;
322               break;
323            case GL_NOTEQUAL:
324               zCmp = z != *depth;
325               break;
326            case GL_GEQUAL:
327               zCmp = z >= *depth;
328               break;
329            case GL_ALWAYS:
330               zCmp = true;
331               break;
332            default:
333               assert(0);
334               break;
335            }
336         }
337         if (!DepthTest || zCmp) {
338            float * varying = (float *)ctx->glCtx->CurrentProgram->ValuesVertexOutput;
339            ShaderFunction_t function = (ShaderFunction_t)ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function;
340            function(&vertex, &vertex, ctx->glCtx->CurrentProgram->ValuesUniform);
341            //ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function();
342            if (BlendEnable) {
343               BlendComp_t sOne = 255, sZero = 0;
344               Vec4<BlendComp_t> one = sOne, zero = sZero;
345
346               Vec4<BlendComp_t> src;
347//                    if (outputRegDesc.IsInt32Color())
348//                        RGBAIntToRGBAIntx4(vertex.fragColor[0].u[0], &src);
349//                    else if (outputRegDesc.IsVectorType(Float))
350               RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src);
351//                    else if (outputRegDesc.IsVectorType(Fixed8))
352//                    {
353//                        src.u[0] = vertex.fragColor[0].u[0];
354//                        src.u[1] = vertex.fragColor[0].u[1];
355//                        src.u[2] = vertex.fragColor[0].u[2];
356//                        src.u[3] = vertex.fragColor[0].u[3];
357//                    }
358//                    else
359//                        assert(0);
360
361               Vec4<BlendComp_t> dst;
362               unsigned dc = *frame;
363               dst.r = dc & 255;
364               dst.g = (dc >>= 8) & 255;
365               dst.b = (dc >>= 8) & 255;
366               dst.a = (dc >>= 8) & 255;
367
368               Vec4<BlendComp_t> sf, df;
369               Vec4<BlendComp_t> blendStateColor(ctx->blendState.color[0], ctx->blendState.color[1],
370                                                 ctx->blendState.color[2], ctx->blendState.color[3]);
371
372               BlendFactor(ctx->blendState.scf, sf, src, dst,
373                           blendStateColor, one, zero, src.a, dst.a,
374                           blendStateColor.a, sOne);
375               if (ctx->blendState.scf != ctx->blendState.saf)
376                  BlendFactor(ctx->blendState.saf, sf.a, src.a, dst.a,
377                              blendStateColor.a, sOne, sZero, src.a, dst.a,
378                              blendStateColor.a, sOne);
379               BlendFactor(ctx->blendState.dcf, df, src, dst,
380                           blendStateColor, one, zero, src.a, dst.a,
381                           blendStateColor.a, sOne);
382               if (ctx->blendState.dcf != ctx->blendState.daf)
383                  BlendFactor(ctx->blendState.daf, df.a, src.a, dst.a,
384                              blendStateColor.a, sOne, sZero, src.a, dst.a,
385                              blendStateColor.a, sOne);
386
387               Vec4<BlendComp_t> sfs(sf), dfs(df);
388               sfs.LShr(7);
389               sf += sfs;
390               dfs.LShr(7);
391               df += dfs;
392
393               src *= sf;
394               dst *= df;
395               Vec4<BlendComp_t> res(src);
396               switch (ctx->blendState.ce + GL_FUNC_ADD) {
397               case GL_FUNC_ADD:
398                  res += dst;
399                  break;
400               case GL_FUNC_SUBTRACT:
401                  res -= dst;
402                  break;
403               case GL_FUNC_REVERSE_SUBTRACT:
404                  res = dst;
405                  res -= src;
406                  break;
407               default:
408                  assert(0);
409                  break;
410               }
411               if (ctx->blendState.ce != ctx->blendState.ae)
412                  switch (ctx->blendState.ce + GL_FUNC_ADD) {
413                  case GL_FUNC_ADD:
414                     res.a = src.a + dst.a;
415                     break;
416                  case GL_FUNC_SUBTRACT:
417                     res.a = src.a - dst.a;
418                     break;
419                  case GL_FUNC_REVERSE_SUBTRACT:
420                     res.a = dst.a - src.a;
421                     break;
422                  default:
423                     assert(0);
424                     break;
425                  }
426
427               res.AShr(8);
428               Saturate(&res);
429               *frame = RGBAIntx4ToRGBAInt(&res);
430            } else {
431//                    if (outputRegDesc.IsInt32Color())
432//                        *frame = vertex.fragColor[0].u[0];
433//                    else if (outputRegDesc.IsVectorType(Float))
434               {
435                  Vec4<BlendComp_t> src;
436                  RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src);
437                  Saturate(&src);
438                  *frame = RGBAIntx4ToRGBAInt(&src);
439               }
440//                    else if (outputRegDesc.IsVectorType(Fixed16))
441//                    {
442//                        Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0];
443//                        src.r = (src.r * 255 >> 16);
444//                        src.g = (src.g * 255 >> 16);
445//                        src.b = (src.b * 255 >> 16);
446//                        src.a = (src.a * 255 >> 16);
447//                        Saturate(&src);
448//                        *frame = RGBAIntx4ToRGBAInt(&src);
449//                    }
450//                    else if (outputRegDesc.IsVectorType(Fixed8))
451//                    {
452//                        Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0];
453//                        Saturate(&src);
454//                        *frame = RGBAIntx4ToRGBAInt(&src);
455//                    }
456//                    else
457//                        assert(0);
458            }
459
460            if (DepthWrite)
461               *depth = z;
462            if (StencilTest)
463               *stencil = StencilOp(sdPass, s, sRef);
464         } else if (StencilTest)
465            *stencil = StencilOp(sdFail, s, sRef);
466      } else if (StencilTest)
467         *stencil = StencilOp(ssFail, s, sRef);
468
469      frame++;
470      depth++;
471      stencil++;
472
473#if USE_FORCED_FIXEDPOINT
474      for (unsigned j = 0; j < 4; j++) {
475         if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord)
476            vertex.position.i[j] += vertexDx.position.i[j];
477         for (unsigned i = 0; i < varyingCount; i++)
478            vertex.varyings[i].i[j] += vertexDx.varyings[i].i[j];
479      }
480      vertex.position.i[2] += vertexDx.position.i[2];
481      if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) {
482         vertex.frontFacingPointCoord.i[2] = vertexDx.frontFacingPointCoord.i[2];
483         vertex.frontFacingPointCoord.i[3] = vertexDx.frontFacingPointCoord.i[3];
484      }
485#else
486   if (ctx->glCtx->CurrentProgram->UsesFragCoord)
487      vertex.position += vertexDx.position;
488   else if (ctx->bufferState.depthTest)
489      vertex.position.z += vertexDx.position.z;
490
491   for (unsigned i = 0; i < varyingCount; i++)
492      vertex.varyings[i] += vertexDx.varyings[i];
493   if (ctx->glCtx->CurrentProgram->UsesPointCoord) {
494      vertex.frontFacingPointCoord.z += vertexDx.frontFacingPointCoord.z;
495      vertex.frontFacingPointCoord.w += vertexDx.frontFacingPointCoord.w;
496   }
497#endif // #if USE_FORCED_FIXEDPOINT
498   }
499
500#endif // #if USE_LLVM_SCANLINE
501
502#if !USE_LLVM_TEXTURE_SAMPLER
503   textureGGLContext = NULL;
504#endif
505}
506
507static void PickScanLine(GGLInterface * iface)
508{
509   GGL_GET_CONTEXT(ctx, iface);
510
511   ctx->interface.ScanLine = NULL;
512   if (ctx->bufferState.stencilTest) {
513      if (ctx->bufferState.depthTest) {
514         if (ctx->blendState.enable)
515            ctx->interface.ScanLine = ScanLine<true, true, true, true>;
516         else
517            ctx->interface.ScanLine = ScanLine<true, true, true, false>;
518      } else {
519         if (ctx->blendState.enable)
520            ctx->interface.ScanLine = ScanLine<true, false, false, true>;
521         else
522            ctx->interface.ScanLine = ScanLine<true, false, false, false>;
523      }
524   } else {
525      if (ctx->bufferState.depthTest) {
526         if (ctx->blendState.enable)
527            ctx->interface.ScanLine = ScanLine<false, true, true, true>;
528         else
529            ctx->interface.ScanLine = ScanLine<false, true, true, false>;
530      } else {
531         if (ctx->blendState.enable)
532            ctx->interface.ScanLine = ScanLine<false, false, false, true>;
533         else
534            ctx->interface.ScanLine = ScanLine<false, false, false, false>;
535      }
536   }
537
538   assert(ctx->interface.ScanLine);
539}
540
541void InitializeScanLineFunctions(GGLInterface * iface)
542{
543   GGL_GET_CONTEXT(ctx, iface);
544   ctx->PickScanLine = PickScanLine;
545}
546