1/* libs/opengles/primitives.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <math.h>
21
22#include "context.h"
23#include "primitives.h"
24#include "light.h"
25#include "matrix.h"
26#include "vertex.h"
27#include "fp.h"
28#include "TextureObjectManager.h"
29
30extern "C" void iterators0032(const void* that,
31        int32_t* it, int32_t c0, int32_t c1, int32_t c2);
32
33namespace android {
34
35// ----------------------------------------------------------------------------
36
37static void primitive_point(ogles_context_t* c, vertex_t* v);
38static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
39static void primitive_clip_triangle(ogles_context_t* c,
40        vertex_t* v0, vertex_t* v1, vertex_t* v2);
41
42static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
43static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
44static void primitive_nop_triangle(ogles_context_t* c,
45        vertex_t* v0, vertex_t* v1, vertex_t* v2);
46
47static inline bool cull_triangle(ogles_context_t* c,
48        vertex_t* v0, vertex_t* v1, vertex_t* v2);
49
50static void lerp_triangle(ogles_context_t* c,
51        vertex_t* v0, vertex_t* v1, vertex_t* v2);
52
53static void lerp_texcoords(ogles_context_t* c,
54        vertex_t* v0, vertex_t* v1, vertex_t* v2);
55
56static void lerp_texcoords_w(ogles_context_t* c,
57        vertex_t* v0, vertex_t* v1, vertex_t* v2);
58
59static void triangle(ogles_context_t* c,
60        vertex_t* v0, vertex_t* v1, vertex_t* v2);
61
62static void clip_triangle(ogles_context_t* c,
63        vertex_t* v0, vertex_t* v1, vertex_t* v2);
64
65static unsigned int clip_line(ogles_context_t* c,
66        vertex_t* s, vertex_t* p);
67
68// ----------------------------------------------------------------------------
69#if 0
70#pragma mark -
71#endif
72
73static void lightTriangleDarkSmooth(ogles_context_t* c,
74        vertex_t* v0, vertex_t* v1, vertex_t* v2)
75{
76    if (!(v0->flags & vertex_t::LIT)) {
77        v0->flags |= vertex_t::LIT;
78        const GLvoid* cp = c->arrays.color.element(
79                v0->index & vertex_cache_t::INDEX_MASK);
80        c->arrays.color.fetch(c, v0->color.v, cp);
81    }
82    if (!(v1->flags & vertex_t::LIT)) {
83        v1->flags |= vertex_t::LIT;
84        const GLvoid* cp = c->arrays.color.element(
85                v1->index & vertex_cache_t::INDEX_MASK);
86        c->arrays.color.fetch(c, v1->color.v, cp);
87    }
88    if(!(v2->flags & vertex_t::LIT)) {
89        v2->flags |= vertex_t::LIT;
90        const GLvoid* cp = c->arrays.color.element(
91                v2->index & vertex_cache_t::INDEX_MASK);
92        c->arrays.color.fetch(c, v2->color.v, cp);
93    }
94}
95
96static void lightTriangleDarkFlat(ogles_context_t* c,
97        vertex_t* v0, vertex_t* v1, vertex_t* v2)
98{
99    if (!(v2->flags & vertex_t::LIT)) {
100        v2->flags |= vertex_t::LIT;
101        const GLvoid* cp = c->arrays.color.element(
102                v2->index & vertex_cache_t::INDEX_MASK);
103        c->arrays.color.fetch(c, v2->color.v, cp);
104    }
105    // configure the rasterizer here, before we clip
106    c->rasterizer.procs.color4xv(c, v2->color.v);
107}
108
109static void lightTriangleSmooth(ogles_context_t* c,
110        vertex_t* v0, vertex_t* v1, vertex_t* v2)
111{
112    if (!(v0->flags & vertex_t::LIT))
113        c->lighting.lightVertex(c, v0);
114    if (!(v1->flags & vertex_t::LIT))
115        c->lighting.lightVertex(c, v1);
116    if(!(v2->flags & vertex_t::LIT))
117        c->lighting.lightVertex(c, v2);
118}
119
120static void lightTriangleFlat(ogles_context_t* c,
121        vertex_t* v0, vertex_t* v1, vertex_t* v2)
122{
123    if (!(v2->flags & vertex_t::LIT))
124        c->lighting.lightVertex(c, v2);
125    // configure the rasterizer here, before we clip
126    c->rasterizer.procs.color4xv(c, v2->color.v);
127}
128
129// The fog versions...
130
131static inline
132void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
133{
134    if (!(v->flags & vertex_t::LIT)) {
135        v->flags |= vertex_t::LIT;
136        v->fog = c->fog.fog(c, v->eye.z);
137        const GLvoid* cp = c->arrays.color.element(
138                v->index & vertex_cache_t::INDEX_MASK);
139        c->arrays.color.fetch(c, v->color.v, cp);
140    }
141}
142static inline
143void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
144{
145    if (!(v->flags & vertex_t::LIT)) {
146        v->flags |= vertex_t::LIT;
147        v->fog = c->fog.fog(c, v->eye.z);
148    }
149}
150static inline
151void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
152{
153    if (!(v->flags & vertex_t::LIT)) {
154        v->fog = c->fog.fog(c, v->eye.z);
155        c->lighting.lightVertex(c, v);
156    }
157}
158
159static void lightTriangleDarkSmoothFog(ogles_context_t* c,
160        vertex_t* v0, vertex_t* v1, vertex_t* v2)
161{
162    lightVertexDarkSmoothFog(c, v0);
163    lightVertexDarkSmoothFog(c, v1);
164    lightVertexDarkSmoothFog(c, v2);
165}
166
167static void lightTriangleDarkFlatFog(ogles_context_t* c,
168        vertex_t* v0, vertex_t* v1, vertex_t* v2)
169{
170    lightVertexDarkFlatFog(c, v0);
171    lightVertexDarkFlatFog(c, v1);
172    lightVertexDarkSmoothFog(c, v2);
173    // configure the rasterizer here, before we clip
174    c->rasterizer.procs.color4xv(c, v2->color.v);
175}
176
177static void lightTriangleSmoothFog(ogles_context_t* c,
178        vertex_t* v0, vertex_t* v1, vertex_t* v2)
179{
180    lightVertexSmoothFog(c, v0);
181    lightVertexSmoothFog(c, v1);
182    lightVertexSmoothFog(c, v2);
183}
184
185static void lightTriangleFlatFog(ogles_context_t* c,
186        vertex_t* v0, vertex_t* v1, vertex_t* v2)
187{
188    lightVertexDarkFlatFog(c, v0);
189    lightVertexDarkFlatFog(c, v1);
190    lightVertexSmoothFog(c, v2);
191    // configure the rasterizer here, before we clip
192    c->rasterizer.procs.color4xv(c, v2->color.v);
193}
194
195
196
197typedef void (*light_primitive_t)(ogles_context_t*,
198        vertex_t*, vertex_t*, vertex_t*);
199
200// fog 0x4, light 0x2, smooth 0x1
201static const light_primitive_t lightPrimitive[8] = {
202    lightTriangleDarkFlat,          // no fog | dark  | flat
203    lightTriangleDarkSmooth,        // no fog | dark  | smooth
204    lightTriangleFlat,              // no fog | light | flat
205    lightTriangleSmooth,            // no fog | light | smooth
206    lightTriangleDarkFlatFog,       // fog    | dark  | flat
207    lightTriangleDarkSmoothFog,     // fog    | dark  | smooth
208    lightTriangleFlatFog,           // fog    | light | flat
209    lightTriangleSmoothFog          // fog    | light | smooth
210};
211
212void ogles_validate_primitives(ogles_context_t* c)
213{
214    const uint32_t enables = c->rasterizer.state.enables;
215
216    // set up the lighting/shading/smoothing/fogging function
217    int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
218    index |= c->lighting.enable ? 0x2 : 0;
219    index |= enables & GGL_ENABLE_FOG ? 0x4 : 0;
220    c->lighting.lightTriangle = lightPrimitive[index];
221
222    // set up the primitive renderers
223    if (ggl_likely(c->arrays.vertex.enable)) {
224        c->prims.renderPoint    = primitive_point;
225        c->prims.renderLine     = primitive_line;
226        c->prims.renderTriangle = primitive_clip_triangle;
227    } else {
228        c->prims.renderPoint    = primitive_nop_point;
229        c->prims.renderLine     = primitive_nop_line;
230        c->prims.renderTriangle = primitive_nop_triangle;
231    }
232}
233
234// ----------------------------------------------------------------------------
235
236void compute_iterators_t::initTriangle(
237        vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
238{
239    m_dx01 = v1->window.x - v0->window.x;
240    m_dy10 = v0->window.y - v1->window.y;
241    m_dx20 = v0->window.x - v2->window.x;
242    m_dy02 = v2->window.y - v0->window.y;
243    m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
244}
245
246void compute_iterators_t::initLine(
247        vertex_t const* v0, vertex_t const* v1)
248{
249    m_dx01 = m_dy02 = v1->window.x - v0->window.x;
250    m_dy10 = m_dx20 = v0->window.y - v1->window.y;
251    m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
252}
253
254void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
255{
256    m_x0 = v0->window.x;
257    m_y0 = v0->window.y;
258    const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
259    const GGLcoord minArea = 2; // cannot be inverted
260    // triangles with an area smaller than 1.0 are not smooth-shaded
261
262    int q=0, s=0, d=0;
263    if (abs(area) >= minArea) {
264        // Here we do some voodoo magic, to compute a suitable scale
265        // factor for deltas/area:
266
267        // First compute the 1/area with full 32-bits precision,
268        // gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
269        d = gglRecipQNormalized(area, &q);
270
271        // Then compute the minimum left-shift to not overflow the muls
272        // below.
273        s = 32 - gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
274
275        // We'll keep 16-bits of precision for deltas/area. So we need
276        // to shift everything left an extra 15 bits.
277        s += 15;
278
279        // make sure all final shifts are not > 32, because gglMulx
280        // can't handle it.
281        if (s < q) s = q;
282        if (s > 32) {
283            d >>= 32-s;
284            s = 32;
285        }
286    }
287
288    m_dx01 = gglMulx(m_dx01, d, s);
289    m_dy10 = gglMulx(m_dy10, d, s);
290    m_dx20 = gglMulx(m_dx20, d, s);
291    m_dy02 = gglMulx(m_dy02, d, s);
292    m_area_scale = 32 + q - s;
293    m_scale = 0;
294
295    if (enables & GGL_ENABLE_TMUS) {
296        const int A = gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
297        const int B = gglClz(abs(m_x0)|abs(m_y0));
298        m_scale = max(0, 32 - (A + 16)) +
299                  max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
300    }
301}
302
303int compute_iterators_t::iteratorsScale(GGLfixed* it,
304        int32_t c0, int32_t c1, int32_t c2) const
305{
306    int32_t dc01 = c1 - c0;
307    int32_t dc02 = c2 - c0;
308    const int A = gglClz(abs(c0));
309    const int B = gglClz(abs(dc01)|abs(dc02));
310    const int scale = min(A, B - m_scale) - 2;
311    if (scale >= 0) {
312        c0   <<= scale;
313        dc01 <<= scale;
314        dc02 <<= scale;
315    } else {
316        c0   >>= -scale;
317        dc01 >>= -scale;
318        dc02 >>= -scale;
319    }
320    const int s = m_area_scale;
321    int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
322    int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
323    int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
324            gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
325    it[0] = c;
326    it[1] = dcdx;
327    it[2] = dcdy;
328    return scale;
329}
330
331void compute_iterators_t::iterators1616(GGLfixed* it,
332        GGLfixed c0, GGLfixed c1, GGLfixed c2) const
333{
334    const GGLfixed dc01 = c1 - c0;
335    const GGLfixed dc02 = c2 - c0;
336    // 16.16 x 16.16 == 32.32 --> 16.16
337    const int s = m_area_scale;
338    int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
339    int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
340    int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
341            gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
342    it[0] = c;
343    it[1] = dcdx;
344    it[2] = dcdy;
345}
346
347void compute_iterators_t::iterators0032(int64_t* it,
348        int32_t c0, int32_t c1, int32_t c2) const
349{
350    const int s = m_area_scale - 16;
351    int32_t dc01 = (c1 - c0)>>s;
352    int32_t dc02 = (c2 - c0)>>s;
353    // 16.16 x 16.16 == 32.32
354    int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
355    int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
356    it[ 0] = (c0<<16) - ((dcdx*m_x0 + dcdy*m_y0)>>4);
357    it[ 1] = dcdx;
358    it[ 2] = dcdy;
359}
360
361#if defined(__arm__) && !defined(__thumb__)
362inline void compute_iterators_t::iterators0032(int32_t* it,
363        int32_t c0, int32_t c1, int32_t c2) const
364{
365    ::iterators0032(this, it, c0, c1, c2);
366}
367#else
368void compute_iterators_t::iterators0032(int32_t* it,
369        int32_t c0, int32_t c1, int32_t c2) const
370{
371    int64_t it64[3];
372    iterators0032(it64, c0, c1, c2);
373    it[0] = it64[0];
374    it[1] = it64[1];
375    it[2] = it64[2];
376}
377#endif
378
379// ----------------------------------------------------------------------------
380
381static inline int32_t clampZ(GLfixed z) CONST;
382int32_t clampZ(GLfixed z) {
383    z = (z & ~(z>>31));
384    if (z >= 0x10000)
385        z = 0xFFFF;
386    return z;
387}
388
389static __attribute__((noinline))
390void fetch_texcoord_impl(ogles_context_t* c,
391        vertex_t* v0, vertex_t* v1, vertex_t* v2)
392{
393    vertex_t* const vtx[3] = { v0, v1, v2 };
394    array_t const * const texcoordArray = c->arrays.texture;
395
396    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
397        if (!(c->rasterizer.state.texture[i].enable))
398            continue;
399
400        for (int j=0 ; j<3 ; j++) {
401            vertex_t* const v = vtx[j];
402            if (v->flags & vertex_t::TT)
403                continue;
404
405            // NOTE: here we could compute automatic texgen
406            // such as sphere/cube maps, instead of fetching them
407            // from the textcoord array.
408
409            vec4_t& coords = v->texture[i];
410            const GLubyte* tp = texcoordArray[i].element(
411                    v->index & vertex_cache_t::INDEX_MASK);
412            texcoordArray[i].fetch(c, coords.v, tp);
413
414            // transform texture coordinates...
415            coords.Q = 0x10000;
416            const transform_t& tr = c->transforms.texture[i].transform;
417            if (ggl_unlikely(tr.ops)) {
418                c->arrays.tex_transform[i](&tr, &coords, &coords);
419            }
420
421            // divide by Q
422            const GGLfixed q = coords.Q;
423            if (ggl_unlikely(q != 0x10000)) {
424                const int32_t qinv = gglRecip28(q);
425                coords.S = gglMulx(coords.S, qinv, 28);
426                coords.T = gglMulx(coords.T, qinv, 28);
427            }
428        }
429    }
430    v0->flags |= vertex_t::TT;
431    v1->flags |= vertex_t::TT;
432    v2->flags |= vertex_t::TT;
433}
434
435inline void fetch_texcoord(ogles_context_t* c,
436        vertex_t* v0, vertex_t* v1, vertex_t* v2)
437{
438    const uint32_t enables = c->rasterizer.state.enables;
439    if (!(enables & GGL_ENABLE_TMUS))
440        return;
441
442    // Fetch & transform texture coordinates...
443    if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
444        // already done for all three vertices, bail...
445        return;
446    }
447    fetch_texcoord_impl(c, v0, v1, v2);
448}
449
450// ----------------------------------------------------------------------------
451#if 0
452#pragma mark -
453#pragma mark Point
454#endif
455
456void primitive_nop_point(ogles_context_t*, vertex_t*) {
457}
458
459void primitive_point(ogles_context_t* c, vertex_t* v)
460{
461    // lighting & clamping...
462    const uint32_t enables = c->rasterizer.state.enables;
463
464    if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
465        if (c->lighting.enable) {
466            c->lighting.lightVertex(c, v);
467        } else {
468            v->flags |= vertex_t::LIT;
469            const GLvoid* cp = c->arrays.color.element(
470                    v->index & vertex_cache_t::INDEX_MASK);
471            c->arrays.color.fetch(c, v->color.v, cp);
472        }
473        if (enables & GGL_ENABLE_FOG) {
474            v->fog = c->fog.fog(c, v->eye.z);
475        }
476    }
477
478    // XXX: we don't need to do that each-time
479    // if color array and lighting not enabled
480    c->rasterizer.procs.color4xv(c, v->color.v);
481
482    // XXX: look into ES point-sprite extension
483    if (enables & GGL_ENABLE_TMUS) {
484        fetch_texcoord(c, v,v,v);
485        for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
486            if (!c->rasterizer.state.texture[i].enable)
487                continue;
488            int32_t itt[8];
489            itt[1] = itt[2] = itt[4] = itt[5] = 0;
490            itt[6] = itt[7] = 16; // XXX: check that
491            if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
492                int width = c->textures.tmu[i].texture->surface.width;
493                itt[0] = v->texture[i].S * width;
494                itt[6] = 0;
495            }
496            if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
497                int height = c->textures.tmu[i].texture->surface.height;
498                itt[3] = v->texture[i].T * height;
499                itt[7] = 0;
500            }
501            c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
502        }
503    }
504
505    if (enables & GGL_ENABLE_DEPTH_TEST) {
506        int32_t itz[3];
507        itz[0] = clampZ(v->window.z) * 0x00010001;
508        itz[1] = itz[2] = 0;
509        c->rasterizer.procs.zGrad3xv(c, itz);
510    }
511
512    if (enables & GGL_ENABLE_FOG) {
513        GLfixed itf[3];
514        itf[0] = v->fog;
515        itf[1] = itf[2] = 0;
516        c->rasterizer.procs.fogGrad3xv(c, itf);
517    }
518
519    // Render our point...
520    c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
521}
522
523// ----------------------------------------------------------------------------
524#if 0
525#pragma mark -
526#pragma mark Line
527#endif
528
529void primitive_nop_line(ogles_context_t*, vertex_t*, vertex_t*) {
530}
531
532void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
533{
534    // get texture coordinates
535    fetch_texcoord(c, v0, v1, v1);
536
537    // light/shade the vertices first (they're copied below)
538    c->lighting.lightTriangle(c, v0, v1, v1);
539
540    // clip the line if needed
541    if (ggl_unlikely((v0->flags | v1->flags) & vertex_t::CLIP_ALL)) {
542        unsigned int count = clip_line(c, v0, v1);
543        if (ggl_unlikely(count == 0))
544            return;
545    }
546
547    // compute iterators...
548    const uint32_t enables = c->rasterizer.state.enables;
549    const uint32_t mask =   GGL_ENABLE_TMUS |
550                            GGL_ENABLE_SMOOTH |
551                            GGL_ENABLE_W |
552                            GGL_ENABLE_FOG |
553                            GGL_ENABLE_DEPTH_TEST;
554
555    if (ggl_unlikely(enables & mask)) {
556        c->lerp.initLine(v0, v1);
557        lerp_triangle(c, v0, v1, v0);
558    }
559
560    // render our line
561    c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
562}
563
564// ----------------------------------------------------------------------------
565#if 0
566#pragma mark -
567#pragma mark Triangle
568#endif
569
570void primitive_nop_triangle(ogles_context_t* c,
571        vertex_t* v0, vertex_t* v1, vertex_t* v2) {
572}
573
574void primitive_clip_triangle(ogles_context_t* c,
575        vertex_t* v0, vertex_t* v1, vertex_t* v2)
576{
577    uint32_t cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
578    if (ggl_likely(!cc)) {
579        // code below must be as optimized as possible, this is the
580        // common code path.
581
582        // This triangle is not clipped, test if it's culled
583        // unclipped triangle...
584        c->lerp.initTriangle(v0, v1, v2);
585        if (cull_triangle(c, v0, v1, v2))
586            return; // culled!
587
588        // Fetch all texture coordinates if needed
589        fetch_texcoord(c, v0, v1, v2);
590
591        // light (or shade) our triangle!
592        c->lighting.lightTriangle(c, v0, v1, v2);
593
594        triangle(c, v0, v1, v2);
595        return;
596    }
597
598    // The assumption here is that we're not going to clip very often,
599    // and even more rarely will we clip a triangle that ends up
600    // being culled out. So it's okay to light the vertices here, even though
601    // in a few cases we won't render the triangle (if culled).
602
603    // Fetch texture coordinates...
604    fetch_texcoord(c, v0, v1, v2);
605
606    // light (or shade) our triangle!
607    c->lighting.lightTriangle(c, v0, v1, v2);
608
609    clip_triangle(c, v0, v1, v2);
610}
611
612// -----------------------------------------------------------------------
613
614void triangle(ogles_context_t* c,
615        vertex_t* v0, vertex_t* v1, vertex_t* v2)
616{
617    // compute iterators...
618    const uint32_t enables = c->rasterizer.state.enables;
619    const uint32_t mask =   GGL_ENABLE_TMUS |
620                            GGL_ENABLE_SMOOTH |
621                            GGL_ENABLE_W |
622                            GGL_ENABLE_FOG |
623                            GGL_ENABLE_DEPTH_TEST;
624
625    if (ggl_likely(enables & mask))
626        lerp_triangle(c, v0, v1, v2);
627
628    c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
629}
630
631void lerp_triangle(ogles_context_t* c,
632        vertex_t* v0, vertex_t* v1, vertex_t* v2)
633{
634    const uint32_t enables = c->rasterizer.state.enables;
635    c->lerp.initLerp(v0, enables);
636
637    // set up texture iterators
638    if (enables & GGL_ENABLE_TMUS) {
639        if (enables & GGL_ENABLE_W) {
640            lerp_texcoords_w(c, v0, v1, v2);
641        } else {
642            lerp_texcoords(c, v0, v1, v2);
643        }
644    }
645
646    // set up the color iterators
647    const compute_iterators_t& lerp = c->lerp;
648    if (enables & GGL_ENABLE_SMOOTH) {
649        GLfixed itc[12];
650        for (int i=0 ; i<4 ; i++) {
651            const GGLcolor c0 = v0->color.v[i] * 255;
652            const GGLcolor c1 = v1->color.v[i] * 255;
653            const GGLcolor c2 = v2->color.v[i] * 255;
654            lerp.iterators1616(&itc[i*3], c0, c1, c2);
655        }
656        c->rasterizer.procs.colorGrad12xv(c, itc);
657    }
658
659    if (enables & GGL_ENABLE_DEPTH_TEST) {
660        int32_t itz[3];
661        const int32_t v0z = clampZ(v0->window.z);
662        const int32_t v1z = clampZ(v1->window.z);
663        const int32_t v2z = clampZ(v2->window.z);
664        if (ggl_unlikely(c->polygonOffset.enable)) {
665            const int32_t units = (c->polygonOffset.units << 16);
666            const GLfixed factor = c->polygonOffset.factor;
667            if (factor) {
668                int64_t itz64[3];
669                lerp.iterators0032(itz64, v0z, v1z, v2z);
670                int64_t maxDepthSlope = max(itz64[1], itz64[2]);
671                itz[0] = uint32_t(itz64[0])
672                        + uint32_t((maxDepthSlope*factor)>>16) + units;
673                itz[1] = uint32_t(itz64[1]);
674                itz[2] = uint32_t(itz64[2]);
675            } else {
676                lerp.iterators0032(itz, v0z, v1z, v2z);
677                itz[0] += units;
678            }
679        } else {
680            lerp.iterators0032(itz, v0z, v1z, v2z);
681        }
682        c->rasterizer.procs.zGrad3xv(c, itz);
683    }
684
685    if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
686        GLfixed itf[3];
687        lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
688        c->rasterizer.procs.fogGrad3xv(c, itf);
689    }
690}
691
692
693static inline
694int compute_lod(ogles_context_t* c, int i,
695        int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
696{
697    // Compute mipmap level / primitive
698    // rho = sqrt( texelArea / area )
699    // lod = log2( rho )
700    // lod = log2( texelArea / area ) / 2
701    // lod = (log2( texelArea ) - log2( area )) / 2
702    const compute_iterators_t& lerp = c->lerp;
703    const GGLcoord area = abs(lerp.area());
704    const int w = c->textures.tmu[i].texture->surface.width;
705    const int h = c->textures.tmu[i].texture->surface.height;
706    const int shift = 16 + (16 - TRI_FRACTION_BITS);
707    int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
708            gglMulx(s2-s0, t1-t0, shift) )*w*h;
709    int log2TArea = (32-TRI_FRACTION_BITS  -1) - gglClz(texelArea);
710    int log2Area  = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
711    int lod = (log2TArea - log2Area + 1) >> 1;
712    return lod;
713}
714
715void lerp_texcoords(ogles_context_t* c,
716        vertex_t* v0, vertex_t* v1, vertex_t* v2)
717{
718    const compute_iterators_t& lerp = c->lerp;
719    int32_t itt[8] __attribute__((aligned(16)));
720    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
721        const texture_t& tmu = c->rasterizer.state.texture[i];
722        if (!tmu.enable)
723            continue;
724
725        // compute the jacobians using block floating-point
726        int32_t s0 = v0->texture[i].S;
727        int32_t t0 = v0->texture[i].T;
728        int32_t s1 = v1->texture[i].S;
729        int32_t t1 = v1->texture[i].T;
730        int32_t s2 = v2->texture[i].S;
731        int32_t t2 = v2->texture[i].T;
732
733        const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
734        if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
735            int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
736            c->rasterizer.procs.bindTextureLod(c, i,
737                    &c->textures.tmu[i].texture->mip(lod));
738        }
739
740        // premultiply (s,t) when clampling
741        if (tmu.s_wrap == GGL_CLAMP) {
742            const int width = tmu.surface.width;
743            s0 *= width;
744            s1 *= width;
745            s2 *= width;
746        }
747        if (tmu.t_wrap == GGL_CLAMP) {
748            const int height = tmu.surface.height;
749            t0 *= height;
750            t1 *= height;
751            t2 *= height;
752        }
753        itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
754        itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
755        c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
756    }
757}
758
759void lerp_texcoords_w(ogles_context_t* c,
760        vertex_t* v0, vertex_t* v1, vertex_t* v2)
761{
762    const compute_iterators_t& lerp = c->lerp;
763    int32_t itt[8] __attribute__((aligned(16)));
764    int32_t itw[3];
765
766    // compute W's scale to 2.30
767    int32_t w0 = v0->window.w;
768    int32_t w1 = v1->window.w;
769    int32_t w2 = v2->window.w;
770    int wscale = 32 - gglClz(w0|w1|w2);
771
772    // compute the jacobian using block floating-point
773    int sc = lerp.iteratorsScale(itw, w0, w1, w2);
774    sc +=  wscale - 16;
775    c->rasterizer.procs.wGrad3xv(c, itw);
776
777    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
778        const texture_t& tmu = c->rasterizer.state.texture[i];
779        if (!tmu.enable)
780            continue;
781
782        // compute the jacobians using block floating-point
783        int32_t s0 = v0->texture[i].S;
784        int32_t t0 = v0->texture[i].T;
785        int32_t s1 = v1->texture[i].S;
786        int32_t t1 = v1->texture[i].T;
787        int32_t s2 = v2->texture[i].S;
788        int32_t t2 = v2->texture[i].T;
789
790        const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
791        if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
792            int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
793            c->rasterizer.procs.bindTextureLod(c, i,
794                    &c->textures.tmu[i].texture->mip(lod));
795        }
796
797        // premultiply (s,t) when clampling
798        if (tmu.s_wrap == GGL_CLAMP) {
799            const int width = tmu.surface.width;
800            s0 *= width;
801            s1 *= width;
802            s2 *= width;
803        }
804        if (tmu.t_wrap == GGL_CLAMP) {
805            const int height = tmu.surface.height;
806            t0 *= height;
807            t1 *= height;
808            t2 *= height;
809        }
810
811        s0 = gglMulx(s0, w0, wscale);
812        t0 = gglMulx(t0, w0, wscale);
813        s1 = gglMulx(s1, w1, wscale);
814        t1 = gglMulx(t1, w1, wscale);
815        s2 = gglMulx(s2, w2, wscale);
816        t2 = gglMulx(t2, w2, wscale);
817
818        itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
819        itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
820        c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
821    }
822}
823
824
825static inline
826bool cull_triangle(ogles_context_t* c, vertex_t* v0, vertex_t* v1, vertex_t* v2)
827{
828    if (ggl_likely(c->cull.enable)) {
829        const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
830        const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
831        if (face == c->cull.cullFace)
832            return true; // culled!
833    }
834    return false;
835}
836
837static inline
838GLfixed frustumPlaneDist(int plane, const vec4_t& s)
839{
840    const GLfixed d = s.v[ plane >> 1 ];
841    return  ((plane & 1) ? (s.w - d) : (s.w + d));
842}
843
844static inline
845int32_t clipDivide(GLfixed a, GLfixed b) {
846    // returns a 4.28 fixed-point
847    return gglMulDivi(1LU<<28, a, b);
848}
849
850void clip_triangle(ogles_context_t* c,
851        vertex_t* v0, vertex_t* v1, vertex_t* v2)
852{
853    uint32_t all_cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
854
855    vertex_t *p0, *p1, *p2;
856    const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
857    const int MAX_VERTICES = 3;
858
859    // Temporary buffer to hold the new vertices. Each plane can add up to
860    // two new vertices (because the polygon is convex).
861    // We need one extra element, to handle an overflow case when
862    // the polygon degenerates into something non convex.
863    vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1];   // ~3KB
864    vertex_t* buf = buffer;
865
866    // original list of vertices (polygon to clip, in fact this
867    // function works with an arbitrary polygon).
868    vertex_t* in[3] = { v0, v1, v2 };
869
870    // output lists (we need 2, which we use back and forth)
871    // (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
872    // 2 more elements for overflow when non convex polygons.
873    vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
874    unsigned int outi = 0;
875
876    // current input list
877    vertex_t** ivl = in;
878
879    // 3 input vertices, 0 in the output list, first plane
880    unsigned int ic = 3;
881
882    // User clip-planes first, the clipping is always done in eye-coordinate
883    // this is basically the same algorithm than for the view-volume
884    // clipping, except for the computation of the distance (vertex, plane)
885    // and the fact that we need to compute the eye-coordinates of each
886    // new vertex we create.
887
888    if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
889    {
890        unsigned int plane = 0;
891        uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
892        do {
893            if (cc & 1) {
894                // pointers to our output list (head and current)
895                vertex_t** const ovl = &out[outi][0];
896                vertex_t** output = ovl;
897                unsigned int oc = 0;
898                unsigned int sentinel = 0;
899                // previous vertex, compute distance to the plane
900                vertex_t* s = ivl[ic-1];
901                const vec4_t& equation = c->clipPlanes.plane[plane].equation;
902                GLfixed sd = dot4(equation.v, s->eye.v);
903                // clip each vertex against this plane...
904                for (unsigned int i=0 ; i<ic ; i++) {
905                    vertex_t* p = ivl[i];
906                    const GLfixed pd = dot4(equation.v, p->eye.v);
907                    if (sd >= 0) {
908                        if (pd >= 0) {
909                            // both inside
910                            *output++ = p;
911                            oc++;
912                        } else {
913                            // s inside, p outside (exiting)
914                            const GLfixed t = clipDivide(sd, sd-pd);
915                            c->arrays.clipEye(c, buf, t, p, s);
916                            *output++ = buf++;
917                            oc++;
918                            if (++sentinel >= 3)
919                                return; // non-convex polygon!
920                        }
921                    } else {
922                        if (pd >= 0) {
923                            // s outside (entering)
924                            if (pd) {
925                                const GLfixed t = clipDivide(pd, pd-sd);
926                                c->arrays.clipEye(c, buf, t, s, p);
927                                *output++ = buf++;
928                                oc++;
929                                if (++sentinel >= 3)
930                                    return; // non-convex polygon!
931                            }
932                            *output++ = p;
933                            oc++;
934                        } else {
935                           // both outside
936                        }
937                    }
938                    s = p;
939                    sd = pd;
940                }
941                // output list become the new input list
942                if (oc<3)
943                    return; // less than 3 vertices left? we're done!
944                ivl = ovl;
945                ic = oc;
946                outi = 1-outi;
947            }
948            cc >>= 1;
949            plane++;
950        } while (cc);
951    }
952
953    // frustum clip-planes
954    if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
955    {
956        unsigned int plane = 0;
957        uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
958        do {
959            if (cc & 1) {
960                // pointers to our output list (head and current)
961                vertex_t** const ovl = &out[outi][0];
962                vertex_t** output = ovl;
963                unsigned int oc = 0;
964                unsigned int sentinel = 0;
965                // previous vertex, compute distance to the plane
966                vertex_t* s = ivl[ic-1];
967                GLfixed sd = frustumPlaneDist(plane, s->clip);
968                // clip each vertex against this plane...
969                for (unsigned int i=0 ; i<ic ; i++) {
970                    vertex_t* p = ivl[i];
971                    const GLfixed pd = frustumPlaneDist(plane, p->clip);
972                    if (sd >= 0) {
973                        if (pd >= 0) {
974                            // both inside
975                            *output++ = p;
976                            oc++;
977                        } else {
978                            // s inside, p outside (exiting)
979                            const GLfixed t = clipDivide(sd, sd-pd);
980                            c->arrays.clipVertex(c, buf, t, p, s);
981                            *output++ = buf++;
982                            oc++;
983                            if (++sentinel >= 3)
984                                return; // non-convex polygon!
985                        }
986                    } else {
987                        if (pd >= 0) {
988                            // s outside (entering)
989                            if (pd) {
990                                const GLfixed t = clipDivide(pd, pd-sd);
991                                c->arrays.clipVertex(c, buf, t, s, p);
992                                *output++ = buf++;
993                                oc++;
994                                if (++sentinel >= 3)
995                                    return; // non-convex polygon!
996                            }
997                            *output++ = p;
998                            oc++;
999                        } else {
1000                           // both outside
1001                        }
1002                    }
1003                    s = p;
1004                    sd = pd;
1005                }
1006                // output list become the new input list
1007                if (oc<3)
1008                    return; // less than 3 vertices left? we're done!
1009                ivl = ovl;
1010                ic = oc;
1011                outi = 1-outi;
1012            }
1013            cc >>= 1;
1014            plane++;
1015        } while (cc);
1016    }
1017
1018    // finally we can render our triangles...
1019    p0 = ivl[0];
1020    p1 = ivl[1];
1021    for (unsigned int i=2 ; i<ic ; i++) {
1022        p2 = ivl[i];
1023        c->lerp.initTriangle(p0, p1, p2);
1024        if (cull_triangle(c, p0, p1, p2)) {
1025            p1 = p2;
1026            continue; // culled!
1027        }
1028        triangle(c, p0, p1, p2);
1029        p1 = p2;
1030    }
1031}
1032
1033unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
1034{
1035    const uint32_t all_cc = (s->flags | p->flags) & vertex_t::CLIP_ALL;
1036
1037    if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
1038    {
1039        unsigned int plane = 0;
1040        uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
1041        do {
1042            if (cc & 1) {
1043                const vec4_t& equation = c->clipPlanes.plane[plane].equation;
1044                const GLfixed sd = dot4(equation.v, s->eye.v);
1045                const GLfixed pd = dot4(equation.v, p->eye.v);
1046                if (sd >= 0) {
1047                    if (pd >= 0) {
1048                        // both inside
1049                    } else {
1050                        // s inside, p outside (exiting)
1051                        const GLfixed t = clipDivide(sd, sd-pd);
1052                        c->arrays.clipEye(c, p, t, p, s);
1053                    }
1054                } else {
1055                    if (pd >= 0) {
1056                        // s outside (entering)
1057                        if (pd) {
1058                            const GLfixed t = clipDivide(pd, pd-sd);
1059                            c->arrays.clipEye(c, s, t, s, p);
1060                        }
1061                    } else {
1062                       // both outside
1063                       return 0;
1064                    }
1065                }
1066            }
1067            cc >>= 1;
1068            plane++;
1069        } while (cc);
1070    }
1071
1072    // frustum clip-planes
1073    if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
1074    {
1075        unsigned int plane = 0;
1076        uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
1077        do {
1078            if (cc & 1) {
1079                const GLfixed sd = frustumPlaneDist(plane, s->clip);
1080                const GLfixed pd = frustumPlaneDist(plane, p->clip);
1081                if (sd >= 0) {
1082                    if (pd >= 0) {
1083                        // both inside
1084                    } else {
1085                        // s inside, p outside (exiting)
1086                        const GLfixed t = clipDivide(sd, sd-pd);
1087                        c->arrays.clipVertex(c, p, t, p, s);
1088                    }
1089                } else {
1090                    if (pd >= 0) {
1091                        // s outside (entering)
1092                        if (pd) {
1093                            const GLfixed t = clipDivide(pd, pd-sd);
1094                            c->arrays.clipVertex(c, s, t, s, p);
1095                        }
1096                    } else {
1097                       // both outside
1098                       return 0;
1099                    }
1100                }
1101            }
1102            cc >>= 1;
1103            plane++;
1104        } while (cc);
1105    }
1106
1107    return 2;
1108}
1109
1110
1111}; // namespace android
1112