1/* libs/opengles/primitives.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <math.h>
21
22#include "context.h"
23#include "primitives.h"
24#include "light.h"
25#include "matrix.h"
26#include "vertex.h"
27#include "fp.h"
28#include "TextureObjectManager.h"
29
30extern "C" void iterators0032(const void* that,
31        int32_t* it, int32_t c0, int32_t c1, int32_t c2);
32
33namespace android {
34
35// ----------------------------------------------------------------------------
36
37static void primitive_point(ogles_context_t* c, vertex_t* v);
38static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
39static void primitive_clip_triangle(ogles_context_t* c,
40        vertex_t* v0, vertex_t* v1, vertex_t* v2);
41
42static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
43static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
44static void primitive_nop_triangle(ogles_context_t* c,
45        vertex_t* v0, vertex_t* v1, vertex_t* v2);
46
47static inline bool cull_triangle(ogles_context_t* c,
48        vertex_t* v0, vertex_t* v1, vertex_t* v2);
49
50static void lerp_triangle(ogles_context_t* c,
51        vertex_t* v0, vertex_t* v1, vertex_t* v2);
52
53static void lerp_texcoords(ogles_context_t* c,
54        vertex_t* v0, vertex_t* v1, vertex_t* v2);
55
56static void lerp_texcoords_w(ogles_context_t* c,
57        vertex_t* v0, vertex_t* v1, vertex_t* v2);
58
59static void triangle(ogles_context_t* c,
60        vertex_t* v0, vertex_t* v1, vertex_t* v2);
61
62static void clip_triangle(ogles_context_t* c,
63        vertex_t* v0, vertex_t* v1, vertex_t* v2);
64
65static unsigned int clip_line(ogles_context_t* c,
66        vertex_t* s, vertex_t* p);
67
68// ----------------------------------------------------------------------------
69#if 0
70#pragma mark -
71#endif
72
73static void lightTriangleDarkSmooth(ogles_context_t* c,
74        vertex_t* v0, vertex_t* v1, vertex_t* v2)
75{
76    if (!(v0->flags & vertex_t::LIT)) {
77        v0->flags |= vertex_t::LIT;
78        const GLvoid* cp = c->arrays.color.element(
79                v0->index & vertex_cache_t::INDEX_MASK);
80        c->arrays.color.fetch(c, v0->color.v, cp);
81    }
82    if (!(v1->flags & vertex_t::LIT)) {
83        v1->flags |= vertex_t::LIT;
84        const GLvoid* cp = c->arrays.color.element(
85                v1->index & vertex_cache_t::INDEX_MASK);
86        c->arrays.color.fetch(c, v1->color.v, cp);
87    }
88    if(!(v2->flags & vertex_t::LIT)) {
89        v2->flags |= vertex_t::LIT;
90        const GLvoid* cp = c->arrays.color.element(
91                v2->index & vertex_cache_t::INDEX_MASK);
92        c->arrays.color.fetch(c, v2->color.v, cp);
93    }
94}
95
96static void lightTriangleDarkFlat(ogles_context_t* c,
97        vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
98{
99    if (!(v2->flags & vertex_t::LIT)) {
100        v2->flags |= vertex_t::LIT;
101        const GLvoid* cp = c->arrays.color.element(
102                v2->index & vertex_cache_t::INDEX_MASK);
103        c->arrays.color.fetch(c, v2->color.v, cp);
104    }
105    // configure the rasterizer here, before we clip
106    c->rasterizer.procs.color4xv(c, v2->color.v);
107}
108
109static void lightTriangleSmooth(ogles_context_t* c,
110        vertex_t* v0, vertex_t* v1, vertex_t* v2)
111{
112    if (!(v0->flags & vertex_t::LIT))
113        c->lighting.lightVertex(c, v0);
114    if (!(v1->flags & vertex_t::LIT))
115        c->lighting.lightVertex(c, v1);
116    if(!(v2->flags & vertex_t::LIT))
117        c->lighting.lightVertex(c, v2);
118}
119
120static void lightTriangleFlat(ogles_context_t* c,
121        vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* v2)
122{
123    if (!(v2->flags & vertex_t::LIT))
124        c->lighting.lightVertex(c, v2);
125    // configure the rasterizer here, before we clip
126    c->rasterizer.procs.color4xv(c, v2->color.v);
127}
128
129// The fog versions...
130
131static inline
132void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
133{
134    if (!(v->flags & vertex_t::LIT)) {
135        v->flags |= vertex_t::LIT;
136        v->fog = c->fog.fog(c, v->eye.z);
137        const GLvoid* cp = c->arrays.color.element(
138                v->index & vertex_cache_t::INDEX_MASK);
139        c->arrays.color.fetch(c, v->color.v, cp);
140    }
141}
142static inline
143void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
144{
145    if (!(v->flags & vertex_t::LIT)) {
146        v->flags |= vertex_t::LIT;
147        v->fog = c->fog.fog(c, v->eye.z);
148    }
149}
150static inline
151void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
152{
153    if (!(v->flags & vertex_t::LIT)) {
154        v->fog = c->fog.fog(c, v->eye.z);
155        c->lighting.lightVertex(c, v);
156    }
157}
158
159static void lightTriangleDarkSmoothFog(ogles_context_t* c,
160        vertex_t* v0, vertex_t* v1, vertex_t* v2)
161{
162    lightVertexDarkSmoothFog(c, v0);
163    lightVertexDarkSmoothFog(c, v1);
164    lightVertexDarkSmoothFog(c, v2);
165}
166
167static void lightTriangleDarkFlatFog(ogles_context_t* c,
168        vertex_t* v0, vertex_t* v1, vertex_t* v2)
169{
170    lightVertexDarkFlatFog(c, v0);
171    lightVertexDarkFlatFog(c, v1);
172    lightVertexDarkSmoothFog(c, v2);
173    // configure the rasterizer here, before we clip
174    c->rasterizer.procs.color4xv(c, v2->color.v);
175}
176
177static void lightTriangleSmoothFog(ogles_context_t* c,
178        vertex_t* v0, vertex_t* v1, vertex_t* v2)
179{
180    lightVertexSmoothFog(c, v0);
181    lightVertexSmoothFog(c, v1);
182    lightVertexSmoothFog(c, v2);
183}
184
185static void lightTriangleFlatFog(ogles_context_t* c,
186        vertex_t* v0, vertex_t* v1, vertex_t* v2)
187{
188    lightVertexDarkFlatFog(c, v0);
189    lightVertexDarkFlatFog(c, v1);
190    lightVertexSmoothFog(c, v2);
191    // configure the rasterizer here, before we clip
192    c->rasterizer.procs.color4xv(c, v2->color.v);
193}
194
195
196
197typedef void (*light_primitive_t)(ogles_context_t*,
198        vertex_t*, vertex_t*, vertex_t*);
199
200// fog 0x4, light 0x2, smooth 0x1
201static const light_primitive_t lightPrimitive[8] = {
202    lightTriangleDarkFlat,          // no fog | dark  | flat
203    lightTriangleDarkSmooth,        // no fog | dark  | smooth
204    lightTriangleFlat,              // no fog | light | flat
205    lightTriangleSmooth,            // no fog | light | smooth
206    lightTriangleDarkFlatFog,       // fog    | dark  | flat
207    lightTriangleDarkSmoothFog,     // fog    | dark  | smooth
208    lightTriangleFlatFog,           // fog    | light | flat
209    lightTriangleSmoothFog          // fog    | light | smooth
210};
211
212void ogles_validate_primitives(ogles_context_t* c)
213{
214    const uint32_t enables = c->rasterizer.state.enables;
215
216    // set up the lighting/shading/smoothing/fogging function
217    int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
218    index |= c->lighting.enable ? 0x2 : 0;
219    index |= enables & GGL_ENABLE_FOG ? 0x4 : 0;
220    c->lighting.lightTriangle = lightPrimitive[index];
221
222    // set up the primitive renderers
223    if (ggl_likely(c->arrays.vertex.enable)) {
224        c->prims.renderPoint    = primitive_point;
225        c->prims.renderLine     = primitive_line;
226        c->prims.renderTriangle = primitive_clip_triangle;
227    } else {
228        c->prims.renderPoint    = primitive_nop_point;
229        c->prims.renderLine     = primitive_nop_line;
230        c->prims.renderTriangle = primitive_nop_triangle;
231    }
232}
233
234// ----------------------------------------------------------------------------
235
236void compute_iterators_t::initTriangle(
237        vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
238{
239    m_dx01 = v1->window.x - v0->window.x;
240    m_dy10 = v0->window.y - v1->window.y;
241    m_dx20 = v0->window.x - v2->window.x;
242    m_dy02 = v2->window.y - v0->window.y;
243    m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
244    (void)m_reserved; // suppress unused warning
245}
246
247void compute_iterators_t::initLine(
248        vertex_t const* v0, vertex_t const* v1)
249{
250    m_dx01 = m_dy02 = v1->window.x - v0->window.x;
251    m_dy10 = m_dx20 = v0->window.y - v1->window.y;
252    m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
253}
254
255void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
256{
257    m_x0 = v0->window.x;
258    m_y0 = v0->window.y;
259    const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
260    const GGLcoord minArea = 2; // cannot be inverted
261    // triangles with an area smaller than 1.0 are not smooth-shaded
262
263    int q=0, s=0, d=0;
264    if (abs(area) >= minArea) {
265        // Here we do some voodoo magic, to compute a suitable scale
266        // factor for deltas/area:
267
268        // First compute the 1/area with full 32-bits precision,
269        // gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
270        d = gglRecipQNormalized(area, &q);
271
272        // Then compute the minimum left-shift to not overflow the muls
273        // below.
274        s = 32 - gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
275
276        // We'll keep 16-bits of precision for deltas/area. So we need
277        // to shift everything left an extra 15 bits.
278        s += 15;
279
280        // make sure all final shifts are not > 32, because gglMulx
281        // can't handle it.
282        if (s < q) s = q;
283        if (s > 32) {
284            d >>= 32-s;
285            s = 32;
286        }
287    }
288
289    m_dx01 = gglMulx(m_dx01, d, s);
290    m_dy10 = gglMulx(m_dy10, d, s);
291    m_dx20 = gglMulx(m_dx20, d, s);
292    m_dy02 = gglMulx(m_dy02, d, s);
293    m_area_scale = 32 + q - s;
294    m_scale = 0;
295
296    if (enables & GGL_ENABLE_TMUS) {
297        const int A = gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
298        const int B = gglClz(abs(m_x0)|abs(m_y0));
299        m_scale = max(0, 32 - (A + 16)) +
300                  max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
301    }
302}
303
304int compute_iterators_t::iteratorsScale(GGLfixed* it,
305        int32_t c0, int32_t c1, int32_t c2) const
306{
307    int32_t dc01 = c1 - c0;
308    int32_t dc02 = c2 - c0;
309    const int A = gglClz(abs(c0));
310    const int B = gglClz(abs(dc01)|abs(dc02));
311    const int scale = min(A, B - m_scale) - 2;
312    if (scale >= 0) {
313        c0   <<= scale;
314        dc01 <<= scale;
315        dc02 <<= scale;
316    } else {
317        c0   >>= -scale;
318        dc01 >>= -scale;
319        dc02 >>= -scale;
320    }
321    const int s = m_area_scale;
322    int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
323    int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
324    int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
325            gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
326    it[0] = c;
327    it[1] = dcdx;
328    it[2] = dcdy;
329    return scale;
330}
331
332void compute_iterators_t::iterators1616(GGLfixed* it,
333        GGLfixed c0, GGLfixed c1, GGLfixed c2) const
334{
335    const GGLfixed dc01 = c1 - c0;
336    const GGLfixed dc02 = c2 - c0;
337    // 16.16 x 16.16 == 32.32 --> 16.16
338    const int s = m_area_scale;
339    int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
340    int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
341    int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
342            gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
343    it[0] = c;
344    it[1] = dcdx;
345    it[2] = dcdy;
346}
347
348void compute_iterators_t::iterators0032(int64_t* it,
349        int32_t c0, int32_t c1, int32_t c2) const
350{
351    const int s = m_area_scale - 16;
352    int32_t dc01 = (c1 - c0)>>s;
353    int32_t dc02 = (c2 - c0)>>s;
354    // 16.16 x 16.16 == 32.32
355    int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
356    int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
357    it[ 0] = (c0<<16) - ((dcdx*m_x0 + dcdy*m_y0)>>4);
358    it[ 1] = dcdx;
359    it[ 2] = dcdy;
360}
361
362#if defined(__arm__) && !defined(__thumb__)
363inline void compute_iterators_t::iterators0032(int32_t* it,
364        int32_t c0, int32_t c1, int32_t c2) const
365{
366    ::iterators0032(this, it, c0, c1, c2);
367}
368#else
369void compute_iterators_t::iterators0032(int32_t* it,
370        int32_t c0, int32_t c1, int32_t c2) const
371{
372    int64_t it64[3];
373    iterators0032(it64, c0, c1, c2);
374    it[0] = it64[0];
375    it[1] = it64[1];
376    it[2] = it64[2];
377}
378#endif
379
380// ----------------------------------------------------------------------------
381
382static inline int32_t clampZ(GLfixed z) CONST;
383int32_t clampZ(GLfixed z) {
384    z = (z & ~(z>>31));
385    if (z >= 0x10000)
386        z = 0xFFFF;
387    return z;
388}
389
390static __attribute__((noinline))
391void fetch_texcoord_impl(ogles_context_t* c,
392        vertex_t* v0, vertex_t* v1, vertex_t* v2)
393{
394    vertex_t* const vtx[3] = { v0, v1, v2 };
395    array_t const * const texcoordArray = c->arrays.texture;
396
397    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
398        if (!(c->rasterizer.state.texture[i].enable))
399            continue;
400
401        for (int j=0 ; j<3 ; j++) {
402            vertex_t* const v = vtx[j];
403            if (v->flags & vertex_t::TT)
404                continue;
405
406            // NOTE: here we could compute automatic texgen
407            // such as sphere/cube maps, instead of fetching them
408            // from the textcoord array.
409
410            vec4_t& coords = v->texture[i];
411            const GLubyte* tp = texcoordArray[i].element(
412                    v->index & vertex_cache_t::INDEX_MASK);
413            texcoordArray[i].fetch(c, coords.v, tp);
414
415            // transform texture coordinates...
416            coords.Q = 0x10000;
417            const transform_t& tr = c->transforms.texture[i].transform;
418            if (ggl_unlikely(tr.ops)) {
419                c->arrays.tex_transform[i](&tr, &coords, &coords);
420            }
421
422            // divide by Q
423            const GGLfixed q = coords.Q;
424            if (ggl_unlikely(q != 0x10000)) {
425                const int32_t qinv = gglRecip28(q);
426                coords.S = gglMulx(coords.S, qinv, 28);
427                coords.T = gglMulx(coords.T, qinv, 28);
428            }
429        }
430    }
431    v0->flags |= vertex_t::TT;
432    v1->flags |= vertex_t::TT;
433    v2->flags |= vertex_t::TT;
434}
435
436inline void fetch_texcoord(ogles_context_t* c,
437        vertex_t* v0, vertex_t* v1, vertex_t* v2)
438{
439    const uint32_t enables = c->rasterizer.state.enables;
440    if (!(enables & GGL_ENABLE_TMUS))
441        return;
442
443    // Fetch & transform texture coordinates...
444    if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
445        // already done for all three vertices, bail...
446        return;
447    }
448    fetch_texcoord_impl(c, v0, v1, v2);
449}
450
451// ----------------------------------------------------------------------------
452#if 0
453#pragma mark -
454#pragma mark Point
455#endif
456
457void primitive_nop_point(ogles_context_t*, vertex_t*) {
458}
459
460void primitive_point(ogles_context_t* c, vertex_t* v)
461{
462    // lighting & clamping...
463    const uint32_t enables = c->rasterizer.state.enables;
464
465    if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
466        if (c->lighting.enable) {
467            c->lighting.lightVertex(c, v);
468        } else {
469            v->flags |= vertex_t::LIT;
470            const GLvoid* cp = c->arrays.color.element(
471                    v->index & vertex_cache_t::INDEX_MASK);
472            c->arrays.color.fetch(c, v->color.v, cp);
473        }
474        if (enables & GGL_ENABLE_FOG) {
475            v->fog = c->fog.fog(c, v->eye.z);
476        }
477    }
478
479    // XXX: we don't need to do that each-time
480    // if color array and lighting not enabled
481    c->rasterizer.procs.color4xv(c, v->color.v);
482
483    // XXX: look into ES point-sprite extension
484    if (enables & GGL_ENABLE_TMUS) {
485        fetch_texcoord(c, v,v,v);
486        for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
487            if (!c->rasterizer.state.texture[i].enable)
488                continue;
489            int32_t itt[8];
490            itt[1] = itt[2] = itt[4] = itt[5] = 0;
491            itt[6] = itt[7] = 16; // XXX: check that
492            if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
493                int width = c->textures.tmu[i].texture->surface.width;
494                itt[0] = v->texture[i].S * width;
495                itt[6] = 0;
496            }
497            if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
498                int height = c->textures.tmu[i].texture->surface.height;
499                itt[3] = v->texture[i].T * height;
500                itt[7] = 0;
501            }
502            c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
503        }
504    }
505
506    if (enables & GGL_ENABLE_DEPTH_TEST) {
507        int32_t itz[3];
508        itz[0] = clampZ(v->window.z) * 0x00010001;
509        itz[1] = itz[2] = 0;
510        c->rasterizer.procs.zGrad3xv(c, itz);
511    }
512
513    if (enables & GGL_ENABLE_FOG) {
514        GLfixed itf[3];
515        itf[0] = v->fog;
516        itf[1] = itf[2] = 0;
517        c->rasterizer.procs.fogGrad3xv(c, itf);
518    }
519
520    // Render our point...
521    c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
522}
523
524// ----------------------------------------------------------------------------
525#if 0
526#pragma mark -
527#pragma mark Line
528#endif
529
530void primitive_nop_line(ogles_context_t*, vertex_t*, vertex_t*) {
531}
532
533void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
534{
535    // get texture coordinates
536    fetch_texcoord(c, v0, v1, v1);
537
538    // light/shade the vertices first (they're copied below)
539    c->lighting.lightTriangle(c, v0, v1, v1);
540
541    // clip the line if needed
542    if (ggl_unlikely((v0->flags | v1->flags) & vertex_t::CLIP_ALL)) {
543        unsigned int count = clip_line(c, v0, v1);
544        if (ggl_unlikely(count == 0))
545            return;
546    }
547
548    // compute iterators...
549    const uint32_t enables = c->rasterizer.state.enables;
550    const uint32_t mask =   GGL_ENABLE_TMUS |
551                            GGL_ENABLE_SMOOTH |
552                            GGL_ENABLE_W |
553                            GGL_ENABLE_FOG |
554                            GGL_ENABLE_DEPTH_TEST;
555
556    if (ggl_unlikely(enables & mask)) {
557        c->lerp.initLine(v0, v1);
558        lerp_triangle(c, v0, v1, v0);
559    }
560
561    // render our line
562    c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
563}
564
565// ----------------------------------------------------------------------------
566#if 0
567#pragma mark -
568#pragma mark Triangle
569#endif
570
571void primitive_nop_triangle(ogles_context_t* /*c*/,
572        vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/) {
573}
574
575void primitive_clip_triangle(ogles_context_t* c,
576        vertex_t* v0, vertex_t* v1, vertex_t* v2)
577{
578    uint32_t cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
579    if (ggl_likely(!cc)) {
580        // code below must be as optimized as possible, this is the
581        // common code path.
582
583        // This triangle is not clipped, test if it's culled
584        // unclipped triangle...
585        c->lerp.initTriangle(v0, v1, v2);
586        if (cull_triangle(c, v0, v1, v2))
587            return; // culled!
588
589        // Fetch all texture coordinates if needed
590        fetch_texcoord(c, v0, v1, v2);
591
592        // light (or shade) our triangle!
593        c->lighting.lightTriangle(c, v0, v1, v2);
594
595        triangle(c, v0, v1, v2);
596        return;
597    }
598
599    // The assumption here is that we're not going to clip very often,
600    // and even more rarely will we clip a triangle that ends up
601    // being culled out. So it's okay to light the vertices here, even though
602    // in a few cases we won't render the triangle (if culled).
603
604    // Fetch texture coordinates...
605    fetch_texcoord(c, v0, v1, v2);
606
607    // light (or shade) our triangle!
608    c->lighting.lightTriangle(c, v0, v1, v2);
609
610    clip_triangle(c, v0, v1, v2);
611}
612
613// -----------------------------------------------------------------------
614
615void triangle(ogles_context_t* c,
616        vertex_t* v0, vertex_t* v1, vertex_t* v2)
617{
618    // compute iterators...
619    const uint32_t enables = c->rasterizer.state.enables;
620    const uint32_t mask =   GGL_ENABLE_TMUS |
621                            GGL_ENABLE_SMOOTH |
622                            GGL_ENABLE_W |
623                            GGL_ENABLE_FOG |
624                            GGL_ENABLE_DEPTH_TEST;
625
626    if (ggl_likely(enables & mask))
627        lerp_triangle(c, v0, v1, v2);
628
629    c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
630}
631
632void lerp_triangle(ogles_context_t* c,
633        vertex_t* v0, vertex_t* v1, vertex_t* v2)
634{
635    const uint32_t enables = c->rasterizer.state.enables;
636    c->lerp.initLerp(v0, enables);
637
638    // set up texture iterators
639    if (enables & GGL_ENABLE_TMUS) {
640        if (enables & GGL_ENABLE_W) {
641            lerp_texcoords_w(c, v0, v1, v2);
642        } else {
643            lerp_texcoords(c, v0, v1, v2);
644        }
645    }
646
647    // set up the color iterators
648    const compute_iterators_t& lerp = c->lerp;
649    if (enables & GGL_ENABLE_SMOOTH) {
650        GLfixed itc[12];
651        for (int i=0 ; i<4 ; i++) {
652            const GGLcolor c0 = v0->color.v[i] * 255;
653            const GGLcolor c1 = v1->color.v[i] * 255;
654            const GGLcolor c2 = v2->color.v[i] * 255;
655            lerp.iterators1616(&itc[i*3], c0, c1, c2);
656        }
657        c->rasterizer.procs.colorGrad12xv(c, itc);
658    }
659
660    if (enables & GGL_ENABLE_DEPTH_TEST) {
661        int32_t itz[3];
662        const int32_t v0z = clampZ(v0->window.z);
663        const int32_t v1z = clampZ(v1->window.z);
664        const int32_t v2z = clampZ(v2->window.z);
665        if (ggl_unlikely(c->polygonOffset.enable)) {
666            const int32_t units = (c->polygonOffset.units << 16);
667            const GLfixed factor = c->polygonOffset.factor;
668            if (factor) {
669                int64_t itz64[3];
670                lerp.iterators0032(itz64, v0z, v1z, v2z);
671                int64_t maxDepthSlope = max(itz64[1], itz64[2]);
672                itz[0] = uint32_t(itz64[0])
673                        + uint32_t((maxDepthSlope*factor)>>16) + units;
674                itz[1] = uint32_t(itz64[1]);
675                itz[2] = uint32_t(itz64[2]);
676            } else {
677                lerp.iterators0032(itz, v0z, v1z, v2z);
678                itz[0] += units;
679            }
680        } else {
681            lerp.iterators0032(itz, v0z, v1z, v2z);
682        }
683        c->rasterizer.procs.zGrad3xv(c, itz);
684    }
685
686    if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
687        GLfixed itf[3];
688        lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
689        c->rasterizer.procs.fogGrad3xv(c, itf);
690    }
691}
692
693
694static inline
695int compute_lod(ogles_context_t* c, int i,
696        int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
697{
698    // Compute mipmap level / primitive
699    // rho = sqrt( texelArea / area )
700    // lod = log2( rho )
701    // lod = log2( texelArea / area ) / 2
702    // lod = (log2( texelArea ) - log2( area )) / 2
703    const compute_iterators_t& lerp = c->lerp;
704    const GGLcoord area = abs(lerp.area());
705    const int w = c->textures.tmu[i].texture->surface.width;
706    const int h = c->textures.tmu[i].texture->surface.height;
707    const int shift = 16 + (16 - TRI_FRACTION_BITS);
708    int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
709            gglMulx(s2-s0, t1-t0, shift) )*w*h;
710    int log2TArea = (32-TRI_FRACTION_BITS  -1) - gglClz(texelArea);
711    int log2Area  = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
712    int lod = (log2TArea - log2Area + 1) >> 1;
713    return lod;
714}
715
716void lerp_texcoords(ogles_context_t* c,
717        vertex_t* v0, vertex_t* v1, vertex_t* v2)
718{
719    const compute_iterators_t& lerp = c->lerp;
720    int32_t itt[8] __attribute__((aligned(16)));
721    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
722        const texture_t& tmu = c->rasterizer.state.texture[i];
723        if (!tmu.enable)
724            continue;
725
726        // compute the jacobians using block floating-point
727        int32_t s0 = v0->texture[i].S;
728        int32_t t0 = v0->texture[i].T;
729        int32_t s1 = v1->texture[i].S;
730        int32_t t1 = v1->texture[i].T;
731        int32_t s2 = v2->texture[i].S;
732        int32_t t2 = v2->texture[i].T;
733
734        const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
735        if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
736            int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
737            c->rasterizer.procs.bindTextureLod(c, i,
738                    &c->textures.tmu[i].texture->mip(lod));
739        }
740
741        // premultiply (s,t) when clampling
742        if (tmu.s_wrap == GGL_CLAMP) {
743            const int width = tmu.surface.width;
744            s0 *= width;
745            s1 *= width;
746            s2 *= width;
747        }
748        if (tmu.t_wrap == GGL_CLAMP) {
749            const int height = tmu.surface.height;
750            t0 *= height;
751            t1 *= height;
752            t2 *= height;
753        }
754        itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
755        itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
756        c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
757    }
758}
759
760void lerp_texcoords_w(ogles_context_t* c,
761        vertex_t* v0, vertex_t* v1, vertex_t* v2)
762{
763    const compute_iterators_t& lerp = c->lerp;
764    int32_t itt[8] __attribute__((aligned(16)));
765    int32_t itw[3];
766
767    // compute W's scale to 2.30
768    int32_t w0 = v0->window.w;
769    int32_t w1 = v1->window.w;
770    int32_t w2 = v2->window.w;
771    int wscale = 32 - gglClz(w0|w1|w2);
772
773    // compute the jacobian using block floating-point
774    int sc = lerp.iteratorsScale(itw, w0, w1, w2);
775    sc +=  wscale - 16;
776    c->rasterizer.procs.wGrad3xv(c, itw);
777
778    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
779        const texture_t& tmu = c->rasterizer.state.texture[i];
780        if (!tmu.enable)
781            continue;
782
783        // compute the jacobians using block floating-point
784        int32_t s0 = v0->texture[i].S;
785        int32_t t0 = v0->texture[i].T;
786        int32_t s1 = v1->texture[i].S;
787        int32_t t1 = v1->texture[i].T;
788        int32_t s2 = v2->texture[i].S;
789        int32_t t2 = v2->texture[i].T;
790
791        const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
792        if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
793            int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
794            c->rasterizer.procs.bindTextureLod(c, i,
795                    &c->textures.tmu[i].texture->mip(lod));
796        }
797
798        // premultiply (s,t) when clampling
799        if (tmu.s_wrap == GGL_CLAMP) {
800            const int width = tmu.surface.width;
801            s0 *= width;
802            s1 *= width;
803            s2 *= width;
804        }
805        if (tmu.t_wrap == GGL_CLAMP) {
806            const int height = tmu.surface.height;
807            t0 *= height;
808            t1 *= height;
809            t2 *= height;
810        }
811
812        s0 = gglMulx(s0, w0, wscale);
813        t0 = gglMulx(t0, w0, wscale);
814        s1 = gglMulx(s1, w1, wscale);
815        t1 = gglMulx(t1, w1, wscale);
816        s2 = gglMulx(s2, w2, wscale);
817        t2 = gglMulx(t2, w2, wscale);
818
819        itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
820        itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
821        c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
822    }
823}
824
825
826static inline
827bool cull_triangle(ogles_context_t* c, vertex_t* /*v0*/, vertex_t* /*v1*/, vertex_t* /*v2*/)
828{
829    if (ggl_likely(c->cull.enable)) {
830        const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
831        const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
832        if (face == c->cull.cullFace)
833            return true; // culled!
834    }
835    return false;
836}
837
838static inline
839GLfixed frustumPlaneDist(int plane, const vec4_t& s)
840{
841    const GLfixed d = s.v[ plane >> 1 ];
842    return  ((plane & 1) ? (s.w - d) : (s.w + d));
843}
844
845static inline
846int32_t clipDivide(GLfixed a, GLfixed b) {
847    // returns a 4.28 fixed-point
848    return gglMulDivi(1LU<<28, a, b);
849}
850
851void clip_triangle(ogles_context_t* c,
852        vertex_t* v0, vertex_t* v1, vertex_t* v2)
853{
854    uint32_t all_cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
855
856    vertex_t *p0, *p1, *p2;
857    const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
858    const int MAX_VERTICES = 3;
859
860    // Temporary buffer to hold the new vertices. Each plane can add up to
861    // two new vertices (because the polygon is convex).
862    // We need one extra element, to handle an overflow case when
863    // the polygon degenerates into something non convex.
864    vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1];   // ~3KB
865    vertex_t* buf = buffer;
866
867    // original list of vertices (polygon to clip, in fact this
868    // function works with an arbitrary polygon).
869    vertex_t* in[3] = { v0, v1, v2 };
870
871    // output lists (we need 2, which we use back and forth)
872    // (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
873    // 2 more elements for overflow when non convex polygons.
874    vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
875    unsigned int outi = 0;
876
877    // current input list
878    vertex_t** ivl = in;
879
880    // 3 input vertices, 0 in the output list, first plane
881    unsigned int ic = 3;
882
883    // User clip-planes first, the clipping is always done in eye-coordinate
884    // this is basically the same algorithm than for the view-volume
885    // clipping, except for the computation of the distance (vertex, plane)
886    // and the fact that we need to compute the eye-coordinates of each
887    // new vertex we create.
888
889    if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
890    {
891        unsigned int plane = 0;
892        uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
893        do {
894            if (cc & 1) {
895                // pointers to our output list (head and current)
896                vertex_t** const ovl = &out[outi][0];
897                vertex_t** output = ovl;
898                unsigned int oc = 0;
899                unsigned int sentinel = 0;
900                // previous vertex, compute distance to the plane
901                vertex_t* s = ivl[ic-1];
902                const vec4_t& equation = c->clipPlanes.plane[plane].equation;
903                GLfixed sd = dot4(equation.v, s->eye.v);
904                // clip each vertex against this plane...
905                for (unsigned int i=0 ; i<ic ; i++) {
906                    vertex_t* p = ivl[i];
907                    const GLfixed pd = dot4(equation.v, p->eye.v);
908                    if (sd >= 0) {
909                        if (pd >= 0) {
910                            // both inside
911                            *output++ = p;
912                            oc++;
913                        } else {
914                            // s inside, p outside (exiting)
915                            const GLfixed t = clipDivide(sd, sd-pd);
916                            c->arrays.clipEye(c, buf, t, p, s);
917                            *output++ = buf++;
918                            oc++;
919                            if (++sentinel >= 3)
920                                return; // non-convex polygon!
921                        }
922                    } else {
923                        if (pd >= 0) {
924                            // s outside (entering)
925                            if (pd) {
926                                const GLfixed t = clipDivide(pd, pd-sd);
927                                c->arrays.clipEye(c, buf, t, s, p);
928                                *output++ = buf++;
929                                oc++;
930                                if (++sentinel >= 3)
931                                    return; // non-convex polygon!
932                            }
933                            *output++ = p;
934                            oc++;
935                        } else {
936                           // both outside
937                        }
938                    }
939                    s = p;
940                    sd = pd;
941                }
942                // output list become the new input list
943                if (oc<3)
944                    return; // less than 3 vertices left? we're done!
945                ivl = ovl;
946                ic = oc;
947                outi = 1-outi;
948            }
949            cc >>= 1;
950            plane++;
951        } while (cc);
952    }
953
954    // frustum clip-planes
955    if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
956    {
957        unsigned int plane = 0;
958        uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
959        do {
960            if (cc & 1) {
961                // pointers to our output list (head and current)
962                vertex_t** const ovl = &out[outi][0];
963                vertex_t** output = ovl;
964                unsigned int oc = 0;
965                unsigned int sentinel = 0;
966                // previous vertex, compute distance to the plane
967                vertex_t* s = ivl[ic-1];
968                GLfixed sd = frustumPlaneDist(plane, s->clip);
969                // clip each vertex against this plane...
970                for (unsigned int i=0 ; i<ic ; i++) {
971                    vertex_t* p = ivl[i];
972                    const GLfixed pd = frustumPlaneDist(plane, p->clip);
973                    if (sd >= 0) {
974                        if (pd >= 0) {
975                            // both inside
976                            *output++ = p;
977                            oc++;
978                        } else {
979                            // s inside, p outside (exiting)
980                            const GLfixed t = clipDivide(sd, sd-pd);
981                            c->arrays.clipVertex(c, buf, t, p, s);
982                            *output++ = buf++;
983                            oc++;
984                            if (++sentinel >= 3)
985                                return; // non-convex polygon!
986                        }
987                    } else {
988                        if (pd >= 0) {
989                            // s outside (entering)
990                            if (pd) {
991                                const GLfixed t = clipDivide(pd, pd-sd);
992                                c->arrays.clipVertex(c, buf, t, s, p);
993                                *output++ = buf++;
994                                oc++;
995                                if (++sentinel >= 3)
996                                    return; // non-convex polygon!
997                            }
998                            *output++ = p;
999                            oc++;
1000                        } else {
1001                           // both outside
1002                        }
1003                    }
1004                    s = p;
1005                    sd = pd;
1006                }
1007                // output list become the new input list
1008                if (oc<3)
1009                    return; // less than 3 vertices left? we're done!
1010                ivl = ovl;
1011                ic = oc;
1012                outi = 1-outi;
1013            }
1014            cc >>= 1;
1015            plane++;
1016        } while (cc);
1017    }
1018
1019    // finally we can render our triangles...
1020    p0 = ivl[0];
1021    p1 = ivl[1];
1022    for (unsigned int i=2 ; i<ic ; i++) {
1023        p2 = ivl[i];
1024        c->lerp.initTriangle(p0, p1, p2);
1025        if (cull_triangle(c, p0, p1, p2)) {
1026            p1 = p2;
1027            continue; // culled!
1028        }
1029        triangle(c, p0, p1, p2);
1030        p1 = p2;
1031    }
1032}
1033
1034unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
1035{
1036    const uint32_t all_cc = (s->flags | p->flags) & vertex_t::CLIP_ALL;
1037
1038    if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
1039    {
1040        unsigned int plane = 0;
1041        uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
1042        do {
1043            if (cc & 1) {
1044                const vec4_t& equation = c->clipPlanes.plane[plane].equation;
1045                const GLfixed sd = dot4(equation.v, s->eye.v);
1046                const GLfixed pd = dot4(equation.v, p->eye.v);
1047                if (sd >= 0) {
1048                    if (pd >= 0) {
1049                        // both inside
1050                    } else {
1051                        // s inside, p outside (exiting)
1052                        const GLfixed t = clipDivide(sd, sd-pd);
1053                        c->arrays.clipEye(c, p, t, p, s);
1054                    }
1055                } else {
1056                    if (pd >= 0) {
1057                        // s outside (entering)
1058                        if (pd) {
1059                            const GLfixed t = clipDivide(pd, pd-sd);
1060                            c->arrays.clipEye(c, s, t, s, p);
1061                        }
1062                    } else {
1063                       // both outside
1064                       return 0;
1065                    }
1066                }
1067            }
1068            cc >>= 1;
1069            plane++;
1070        } while (cc);
1071    }
1072
1073    // frustum clip-planes
1074    if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
1075    {
1076        unsigned int plane = 0;
1077        uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
1078        do {
1079            if (cc & 1) {
1080                const GLfixed sd = frustumPlaneDist(plane, s->clip);
1081                const GLfixed pd = frustumPlaneDist(plane, p->clip);
1082                if (sd >= 0) {
1083                    if (pd >= 0) {
1084                        // both inside
1085                    } else {
1086                        // s inside, p outside (exiting)
1087                        const GLfixed t = clipDivide(sd, sd-pd);
1088                        c->arrays.clipVertex(c, p, t, p, s);
1089                    }
1090                } else {
1091                    if (pd >= 0) {
1092                        // s outside (entering)
1093                        if (pd) {
1094                            const GLfixed t = clipDivide(pd, pd-sd);
1095                            c->arrays.clipVertex(c, s, t, s, p);
1096                        }
1097                    } else {
1098                       // both outside
1099                       return 0;
1100                    }
1101                }
1102            }
1103            cc >>= 1;
1104            plane++;
1105        } while (cc);
1106    }
1107
1108    return 2;
1109}
1110
1111
1112}; // namespace android
1113