primitives.cpp revision 7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407
1/* libs/opengles/primitives.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <math.h>
21
22#include "context.h"
23#include "primitives.h"
24#include "light.h"
25#include "matrix.h"
26#include "vertex.h"
27#include "fp.h"
28#include "TextureObjectManager.h"
29
30extern "C" void iterators0032(const void* that,
31        int32_t* it, int32_t c0, int32_t c1, int32_t c2);
32
33namespace android {
34
35// ----------------------------------------------------------------------------
36
37static void primitive_point(ogles_context_t* c, vertex_t* v);
38static void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
39static void primitive_clip_triangle(ogles_context_t* c,
40        vertex_t* v0, vertex_t* v1, vertex_t* v2);
41
42static void primitive_nop_point(ogles_context_t* c, vertex_t* v);
43static void primitive_nop_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1);
44static void primitive_nop_triangle(ogles_context_t* c,
45        vertex_t* v0, vertex_t* v1, vertex_t* v2);
46
47static inline bool cull_triangle(ogles_context_t* c,
48        vertex_t* v0, vertex_t* v1, vertex_t* v2);
49
50static void lerp_triangle(ogles_context_t* c,
51        vertex_t* v0, vertex_t* v1, vertex_t* v2);
52
53static void lerp_texcoords(ogles_context_t* c,
54        vertex_t* v0, vertex_t* v1, vertex_t* v2);
55
56static void lerp_texcoords_w(ogles_context_t* c,
57        vertex_t* v0, vertex_t* v1, vertex_t* v2);
58
59static void triangle(ogles_context_t* c,
60        vertex_t* v0, vertex_t* v1, vertex_t* v2);
61
62static void clip_triangle(ogles_context_t* c,
63        vertex_t* v0, vertex_t* v1, vertex_t* v2);
64
65static unsigned int clip_line(ogles_context_t* c,
66        vertex_t* s, vertex_t* p);
67
68// ----------------------------------------------------------------------------
69#if 0
70#pragma mark -
71#endif
72
73static void lightTriangleDarkSmooth(ogles_context_t* c,
74        vertex_t* v0, vertex_t* v1, vertex_t* v2)
75{
76    if (!(v0->flags & vertex_t::LIT)) {
77        v0->flags |= vertex_t::LIT;
78        const GLvoid* cp = c->arrays.color.element(
79                v0->index & vertex_cache_t::INDEX_MASK);
80        c->arrays.color.fetch(c, v0->color.v, cp);
81    }
82    if (!(v1->flags & vertex_t::LIT)) {
83        v1->flags |= vertex_t::LIT;
84        const GLvoid* cp = c->arrays.color.element(
85                v1->index & vertex_cache_t::INDEX_MASK);
86        c->arrays.color.fetch(c, v1->color.v, cp);
87    }
88    if(!(v2->flags & vertex_t::LIT)) {
89        v2->flags |= vertex_t::LIT;
90        const GLvoid* cp = c->arrays.color.element(
91                v2->index & vertex_cache_t::INDEX_MASK);
92        c->arrays.color.fetch(c, v2->color.v, cp);
93    }
94}
95
96static void lightTriangleDarkFlat(ogles_context_t* c,
97        vertex_t* v0, vertex_t* v1, vertex_t* v2)
98{
99    if (!(v2->flags & vertex_t::LIT)) {
100        v2->flags |= vertex_t::LIT;
101        const GLvoid* cp = c->arrays.color.element(
102                v2->index & vertex_cache_t::INDEX_MASK);
103        c->arrays.color.fetch(c, v2->color.v, cp);
104    }
105    // configure the rasterizer here, before we clip
106    c->rasterizer.procs.color4xv(c, v2->color.v);
107}
108
109static void lightTriangleSmooth(ogles_context_t* c,
110        vertex_t* v0, vertex_t* v1, vertex_t* v2)
111{
112    if (!(v0->flags & vertex_t::LIT))
113        c->lighting.lightVertex(c, v0);
114    if (!(v1->flags & vertex_t::LIT))
115        c->lighting.lightVertex(c, v1);
116    if(!(v2->flags & vertex_t::LIT))
117        c->lighting.lightVertex(c, v2);
118}
119
120static void lightTriangleFlat(ogles_context_t* c,
121        vertex_t* v0, vertex_t* v1, vertex_t* v2)
122{
123    if (!(v2->flags & vertex_t::LIT))
124        c->lighting.lightVertex(c, v2);
125    // configure the rasterizer here, before we clip
126    c->rasterizer.procs.color4xv(c, v2->color.v);
127}
128
129// The fog versions...
130
131static inline
132void lightVertexDarkSmoothFog(ogles_context_t* c, vertex_t* v)
133{
134    if (!(v->flags & vertex_t::LIT)) {
135        v->flags |= vertex_t::LIT;
136        v->fog = c->fog.fog(c, v->window.z);
137        const GLvoid* cp = c->arrays.color.element(
138                v->index & vertex_cache_t::INDEX_MASK);
139        c->arrays.color.fetch(c, v->color.v, cp);
140    }
141}
142static inline
143void lightVertexDarkFlatFog(ogles_context_t* c, vertex_t* v)
144{
145    if (!(v->flags & vertex_t::LIT)) {
146        v->flags |= vertex_t::LIT;
147        v->fog = c->fog.fog(c, v->window.z);
148    }
149}
150static inline
151void lightVertexSmoothFog(ogles_context_t* c, vertex_t* v)
152{
153    if (!(v->flags & vertex_t::LIT)) {
154        v->fog = c->fog.fog(c, v->window.z);
155        c->lighting.lightVertex(c, v);
156    }
157}
158
159static void lightTriangleDarkSmoothFog(ogles_context_t* c,
160        vertex_t* v0, vertex_t* v1, vertex_t* v2)
161{
162    lightVertexDarkSmoothFog(c, v0);
163    lightVertexDarkSmoothFog(c, v1);
164    lightVertexDarkSmoothFog(c, v2);
165}
166
167static void lightTriangleDarkFlatFog(ogles_context_t* c,
168        vertex_t* v0, vertex_t* v1, vertex_t* v2)
169{
170    lightVertexDarkFlatFog(c, v0);
171    lightVertexDarkFlatFog(c, v1);
172    lightVertexDarkSmoothFog(c, v2);
173    // configure the rasterizer here, before we clip
174    c->rasterizer.procs.color4xv(c, v2->color.v);
175}
176
177static void lightTriangleSmoothFog(ogles_context_t* c,
178        vertex_t* v0, vertex_t* v1, vertex_t* v2)
179{
180    lightVertexSmoothFog(c, v0);
181    lightVertexSmoothFog(c, v1);
182    lightVertexSmoothFog(c, v2);
183}
184
185static void lightTriangleFlatFog(ogles_context_t* c,
186        vertex_t* v0, vertex_t* v1, vertex_t* v2)
187{
188    lightVertexDarkFlatFog(c, v0);
189    lightVertexDarkFlatFog(c, v1);
190    lightVertexSmoothFog(c, v2);
191    // configure the rasterizer here, before we clip
192    c->rasterizer.procs.color4xv(c, v2->color.v);
193}
194
195
196
197typedef void (*light_primitive_t)(ogles_context_t*,
198        vertex_t*, vertex_t*, vertex_t*);
199
200// fog 0x4, light 0x2, smooth 0x1
201static const light_primitive_t lightPrimitive[8] = {
202    lightTriangleDarkFlat,          // no fog | dark  | flat
203    lightTriangleDarkSmooth,        // no fog | dark  | smooth
204    lightTriangleFlat,              // no fog | light | flat
205    lightTriangleSmooth,            // no fog | light | smooth
206    lightTriangleDarkFlatFog,       // fog    | dark  | flat
207    lightTriangleDarkSmoothFog,     // fog    | dark  | smooth
208    lightTriangleFlatFog,           // fog    | light | flat
209    lightTriangleSmoothFog          // fog    | light | smooth
210};
211
212void ogles_validate_primitives(ogles_context_t* c)
213{
214    const uint32_t enables = c->rasterizer.state.enables;
215
216    // set up the lighting/shading/smoothing/fogging function
217    int index = enables & GGL_ENABLE_SMOOTH ? 0x1 : 0;
218    index |= c->lighting.enable ? 0x2 : 0;
219    index |= enables & GGL_ENABLE_FOG ? 0x4 : 0;
220    c->lighting.lightTriangle = lightPrimitive[index];
221
222    // set up the primitive renderers
223    if (ggl_likely(c->arrays.vertex.enable)) {
224        c->prims.renderPoint    = primitive_point;
225        c->prims.renderLine     = primitive_line;
226        c->prims.renderTriangle = primitive_clip_triangle;
227    } else {
228        c->prims.renderPoint    = primitive_nop_point;
229        c->prims.renderLine     = primitive_nop_line;
230        c->prims.renderTriangle = primitive_nop_triangle;
231    }
232}
233
234// ----------------------------------------------------------------------------
235
236void compute_iterators_t::initTriangle(
237        vertex_t const* v0, vertex_t const* v1, vertex_t const* v2)
238{
239    m_dx01 = v1->window.x - v0->window.x;
240    m_dy10 = v0->window.y - v1->window.y;
241    m_dx20 = v0->window.x - v2->window.x;
242    m_dy02 = v2->window.y - v0->window.y;
243    m_area = m_dx01*m_dy02 + (-m_dy10)*m_dx20;
244}
245
246void compute_iterators_t::initLerp(vertex_t const* v0, uint32_t enables)
247{
248    m_x0 = v0->window.x;
249    m_y0 = v0->window.y;
250    const GGLcoord area = (m_area + TRI_HALF) >> TRI_FRACTION_BITS;
251    const GGLcoord minArea = 2; // cannot be inversed
252    // triangles with an area smaller than 1.0 are not smooth-shaded
253
254    int q=0, s=0, d=0;
255    if (abs(area) >= minArea) {
256        // Here we do some voodoo magic, to compute a suitable scale
257        // factor for deltas/area:
258
259        // First compute the 1/area with full 32-bits precision,
260        // gglRecipQNormalized returns a number [-0.5, 0.5[ and an exponent.
261        d = gglRecipQNormalized(area, &q);
262
263        // Then compute the minimum left-shift to not overflow the muls
264        // below.
265        s = 32 - gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
266
267        // We'll keep 16-bits of precision for deltas/area. So we need
268        // to shift everything left an extra 15 bits.
269        s += 15;
270
271        // make sure all final shifts are not > 32, because gglMulx
272        // can't handle it.
273        if (s < q) s = q;
274        if (s > 32) {
275            d >>= 32-s;
276            s = 32;
277        }
278    }
279
280    m_dx01 = gglMulx(m_dx01, d, s);
281    m_dy10 = gglMulx(m_dy10, d, s);
282    m_dx20 = gglMulx(m_dx20, d, s);
283    m_dy02 = gglMulx(m_dy02, d, s);
284    m_area_scale = 32 + q - s;
285    m_scale = 0;
286
287    if (enables & GGL_ENABLE_TMUS) {
288        const int A = gglClz(abs(m_dy02)|abs(m_dy10)|abs(m_dx01)|abs(m_dx20));
289        const int B = gglClz(abs(m_x0)|abs(m_y0));
290        m_scale = max(0, 32 - (A + 16)) +
291                  max(0, 32 - (B + TRI_FRACTION_BITS)) + 1;
292    }
293}
294
295int compute_iterators_t::iteratorsScale(GGLfixed* it,
296        int32_t c0, int32_t c1, int32_t c2) const
297{
298    int32_t dc01 = c1 - c0;
299    int32_t dc02 = c2 - c0;
300    const int A = gglClz(abs(c0));
301    const int B = gglClz(abs(dc01)|abs(dc02));
302    const int scale = min(A, B - m_scale) - 2;
303    if (scale >= 0) {
304        c0   <<= scale;
305        dc01 <<= scale;
306        dc02 <<= scale;
307    } else {
308        c0   >>= -scale;
309        dc01 >>= -scale;
310        dc02 >>= -scale;
311    }
312    const int s = m_area_scale;
313    int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
314    int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
315    int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
316            gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
317    it[0] = c;
318    it[1] = dcdx;
319    it[2] = dcdy;
320    return scale;
321}
322
323void compute_iterators_t::iterators1616(GGLfixed* it,
324        GGLfixed c0, GGLfixed c1, GGLfixed c2) const
325{
326    const GGLfixed dc01 = c1 - c0;
327    const GGLfixed dc02 = c2 - c0;
328    // 16.16 x 16.16 == 32.32 --> 16.16
329    const int s = m_area_scale;
330    int32_t dcdx = gglMulAddx(dc01, m_dy02, gglMulx(dc02, m_dy10, s), s);
331    int32_t dcdy = gglMulAddx(dc02, m_dx01, gglMulx(dc01, m_dx20, s), s);
332    int32_t c = c0 - (gglMulAddx(dcdx, m_x0,
333            gglMulx(dcdy, m_y0, TRI_FRACTION_BITS), TRI_FRACTION_BITS));
334    it[0] = c;
335    it[1] = dcdx;
336    it[2] = dcdy;
337}
338
339#if defined(__arm__) && !defined(__thumb__)
340inline void compute_iterators_t::iterators0032(int32_t* it,
341        int32_t c0, int32_t c1, int32_t c2) const
342{
343    ::iterators0032(this, it, c0, c1, c2);
344}
345#else
346void compute_iterators_t::iterators0032(int32_t* it,
347        int32_t c0, int32_t c1, int32_t c2) const
348{
349    const int s = m_area_scale - 16;
350    int32_t dc01 = (c1 - c0)>>s;
351    int32_t dc02 = (c2 - c0)>>s;
352    // 16.16 x 16.16 == 32.32
353    int64_t dcdx = gglMulii(dc01, m_dy02) + gglMulii(dc02, m_dy10);
354    int64_t dcdy = gglMulii(dc02, m_dx01) + gglMulii(dc01, m_dx20);
355    int32_t c = (c0<<16) - ((dcdx*m_x0 + dcdy*m_y0)>>4);
356    it[ 0] = c;
357    it[ 1] = dcdx;
358    it[ 2] = dcdy;
359}
360#endif
361
362// ----------------------------------------------------------------------------
363
364static inline int32_t clampZ(GLfixed z) CONST;
365int32_t clampZ(GLfixed z) {
366    z = (z & ~(z>>31));
367    if (z >= 0x10000)
368        z = 0xFFFF;
369    return z;
370}
371
372static __attribute__((noinline))
373void fetch_texcoord_impl(ogles_context_t* c,
374        vertex_t* v0, vertex_t* v1, vertex_t* v2)
375{
376    vertex_t* const vtx[3] = { v0, v1, v2 };
377    array_t const * const texcoordArray = c->arrays.texture;
378
379    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
380        if (!(c->rasterizer.state.texture[i].enable))
381            continue;
382
383        for (int j=0 ; j<3 ; j++) {
384            vertex_t* const v = vtx[j];
385            if (v->flags & vertex_t::TT)
386                continue;
387
388            // NOTE: here we could compute automatic texgen
389            // such as sphere/cube maps, instead of fetching them
390            // from the textcoord array.
391
392            vec4_t& coords = v->texture[i];
393            const GLubyte* tp = texcoordArray[i].element(
394                    v->index & vertex_cache_t::INDEX_MASK);
395            texcoordArray[i].fetch(c, coords.v, tp);
396
397            // transform texture coordinates...
398            coords.Q = 0x10000;
399            const transform_t& tr = c->transforms.texture[i].transform;
400            if (ggl_unlikely(tr.ops)) {
401                c->arrays.tex_transform[i](&tr, &coords, &coords);
402            }
403
404            // divide by Q
405            const GGLfixed q = coords.Q;
406            if (ggl_unlikely(q != 0x10000)) {
407                const int32_t qinv = gglRecip28(q);
408                coords.S = gglMulx(coords.S, qinv, 28);
409                coords.T = gglMulx(coords.T, qinv, 28);
410            }
411        }
412    }
413    v0->flags |= vertex_t::TT;
414    v1->flags |= vertex_t::TT;
415    v2->flags |= vertex_t::TT;
416}
417
418inline void fetch_texcoord(ogles_context_t* c,
419        vertex_t* v0, vertex_t* v1, vertex_t* v2)
420{
421    const uint32_t enables = c->rasterizer.state.enables;
422    if (!(enables & GGL_ENABLE_TMUS))
423        return;
424
425    // Fetch & transform texture coordinates...
426    if (ggl_likely(v0->flags & v1->flags & v2->flags & vertex_t::TT)) {
427        // already done for all three vertices, bail...
428        return;
429    }
430    fetch_texcoord_impl(c, v0, v1, v2);
431}
432
433// ----------------------------------------------------------------------------
434#if 0
435#pragma mark -
436#pragma mark Point
437#endif
438
439void primitive_nop_point(ogles_context_t*, vertex_t*) {
440}
441
442void primitive_point(ogles_context_t* c, vertex_t* v)
443{
444    // lighting & clamping...
445    const uint32_t enables = c->rasterizer.state.enables;
446
447    if (ggl_unlikely(!(v->flags & vertex_t::LIT))) {
448        if (c->lighting.enable) {
449            c->lighting.lightVertex(c, v);
450        } else {
451            v->flags |= vertex_t::LIT;
452            const GLvoid* cp = c->arrays.color.element(
453                    v->index & vertex_cache_t::INDEX_MASK);
454            c->arrays.color.fetch(c, v->color.v, cp);
455        }
456        if (enables & GGL_ENABLE_FOG) {
457            v->fog = c->fog.fog(c, v->window.z);
458        }
459    }
460
461    // XXX: we don't need to do that each-time
462    // if color array and lighting not enabled
463    c->rasterizer.procs.color4xv(c, v->color.v);
464
465    // XXX: look into ES point-sprite extension
466    if (enables & GGL_ENABLE_TMUS) {
467        fetch_texcoord(c, v,v,v);
468        for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
469            if (!c->rasterizer.state.texture[i].enable)
470                continue;
471            int32_t itt[8];
472            itt[1] = itt[2] = itt[4] = itt[5] = 0;
473            itt[6] = itt[7] = 16; // XXX: check that
474            if (c->rasterizer.state.texture[i].s_wrap == GGL_CLAMP) {
475                int width = c->textures.tmu[i].texture->surface.width;
476                itt[0] = v->texture[i].S * width;
477                itt[6] = 0;
478            }
479            if (c->rasterizer.state.texture[i].t_wrap == GGL_CLAMP) {
480                int height = c->textures.tmu[i].texture->surface.height;
481                itt[3] = v->texture[i].T * height;
482                itt[7] = 0;
483            }
484            c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
485        }
486    }
487
488    if (enables & GGL_ENABLE_DEPTH_TEST) {
489        int32_t itz[3];
490        itz[0] = clampZ(v->window.z) * 0x00010001;
491        itz[1] = itz[2] = 0;
492        c->rasterizer.procs.zGrad3xv(c, itz);
493    }
494
495    if (enables & GGL_ENABLE_FOG) {
496        GLfixed itf[3];
497        itf[0] = v->fog;
498        itf[1] = itf[2] = 0;
499        c->rasterizer.procs.fogGrad3xv(c, itf);
500    }
501
502    // Render our point...
503    c->rasterizer.procs.pointx(c, v->window.v, c->point.size);
504}
505
506// ----------------------------------------------------------------------------
507#if 0
508#pragma mark -
509#pragma mark Line
510#endif
511
512void primitive_nop_line(ogles_context_t*, vertex_t*, vertex_t*) {
513}
514
515void primitive_line(ogles_context_t* c, vertex_t* v0, vertex_t* v1)
516{
517    // This is a cheezy implementation of line drawing that
518    // uses 2 triangles per line.
519    // That said, how often line drawing is used?
520
521    // get texture coordinates
522    fetch_texcoord(c, v0, v1, v1);
523
524    // light/shade the vertices first (they're copied below)
525    c->lighting.lightTriangle(c, v0, v1, v1);
526
527    vertex_t v[4];
528    v[0] = *v0;
529    v[1] = *v1;
530    v0 = &v[0];
531    v1 = &v[1];
532
533    // clip the line if needed
534    if (ggl_unlikely((v0->flags | v1->flags) & vertex_t::CLIP_ALL)) {
535        unsigned int count = clip_line(c, v0, v1);
536        if (ggl_unlikely(count == 0))
537            return;
538    }
539
540    // compute iterators...
541    const uint32_t enables = c->rasterizer.state.enables;
542    const uint32_t mask =   GGL_ENABLE_TMUS |
543                            GGL_ENABLE_SMOOTH |
544                            GGL_ENABLE_W |
545                            GGL_ENABLE_FOG |
546                            GGL_ENABLE_DEPTH_TEST;
547
548    if (ggl_unlikely(enables & mask)) {
549        c->lerp.initTriangle(v0, v1, v1);
550        lerp_triangle(c, v0, v1, v1);
551    }
552
553    // render our line
554    c->rasterizer.procs.linex(c, v0->window.v, v1->window.v, c->line.width);
555}
556
557// ----------------------------------------------------------------------------
558#if 0
559#pragma mark -
560#pragma mark Triangle
561#endif
562
563void primitive_nop_triangle(ogles_context_t* c,
564        vertex_t* v0, vertex_t* v1, vertex_t* v2) {
565}
566
567void primitive_clip_triangle(ogles_context_t* c,
568        vertex_t* v0, vertex_t* v1, vertex_t* v2)
569{
570    uint32_t cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
571    if (ggl_likely(!cc)) {
572        // code below must be as optimized as possible, this is the
573        // common code path.
574
575        // This triangle is not clipped, test if it's culled
576        // unclipped triangle...
577        c->lerp.initTriangle(v0, v1, v2);
578        if (cull_triangle(c, v0, v1, v2))
579            return; // culled!
580
581        // Fetch all texture coordinates if needed
582        fetch_texcoord(c, v0, v1, v2);
583
584        // light (or shade) our triangle!
585        c->lighting.lightTriangle(c, v0, v1, v2);
586
587        triangle(c, v0, v1, v2);
588        return;
589    }
590
591    // The assumption here is that we're not going to clip very often,
592    // and even more rarely will we clip a triangle that ends up
593    // being culled out. So it's okay to light the vertices here, even though
594    // in a few cases we won't render the triangle (if culled).
595
596    // Fetch texture coordinates...
597    fetch_texcoord(c, v0, v1, v2);
598
599    // light (or shade) our triangle!
600    c->lighting.lightTriangle(c, v0, v1, v2);
601
602    clip_triangle(c, v0, v1, v2);
603}
604
605// -----------------------------------------------------------------------
606
607void triangle(ogles_context_t* c,
608        vertex_t* v0, vertex_t* v1, vertex_t* v2)
609{
610    // compute iterators...
611    const uint32_t enables = c->rasterizer.state.enables;
612    const uint32_t mask =   GGL_ENABLE_TMUS |
613                            GGL_ENABLE_SMOOTH |
614                            GGL_ENABLE_W |
615                            GGL_ENABLE_FOG |
616                            GGL_ENABLE_DEPTH_TEST;
617
618    if (ggl_likely(enables & mask))
619        lerp_triangle(c, v0, v1, v2);
620
621    c->rasterizer.procs.trianglex(c, v0->window.v, v1->window.v, v2->window.v);
622}
623
624void lerp_triangle(ogles_context_t* c,
625        vertex_t* v0, vertex_t* v1, vertex_t* v2)
626{
627    const uint32_t enables = c->rasterizer.state.enables;
628    c->lerp.initLerp(v0, enables);
629
630    // set up texture iterators
631    if (enables & GGL_ENABLE_TMUS) {
632        if (enables & GGL_ENABLE_W) {
633            lerp_texcoords_w(c, v0, v1, v2);
634        } else {
635            lerp_texcoords(c, v0, v1, v2);
636        }
637    }
638
639    // set up the color iterators
640    const compute_iterators_t& lerp = c->lerp;
641    if (enables & GGL_ENABLE_SMOOTH) {
642        GLfixed itc[12];
643        for (int i=0 ; i<4 ; i++) {
644            const GGLcolor c0 = v0->color.v[i] * 255;
645            const GGLcolor c1 = v1->color.v[i] * 255;
646            const GGLcolor c2 = v2->color.v[i] * 255;
647            lerp.iterators1616(&itc[i*3], c0, c1, c2);
648        }
649        c->rasterizer.procs.colorGrad12xv(c, itc);
650    }
651
652    if (enables & GGL_ENABLE_DEPTH_TEST) {
653        int32_t itz[3];
654        const int32_t v0z = clampZ(v0->window.z);
655        const int32_t v1z = clampZ(v1->window.z);
656        const int32_t v2z = clampZ(v2->window.z);
657        lerp.iterators0032(itz, v0z, v1z, v2z);
658        if (ggl_unlikely(c->polygonOffset.enable)) {
659            const GLfixed factor = c->polygonOffset.factor;
660            const GLfixed units = c->polygonOffset.units;
661            int32_t maxDepthSlope = max(abs(itz[1]), abs(itz[2]));
662            int32_t offset = (int64_t(maxDepthSlope)*factor +
663                    (int64_t(units) << 16)) >> 16;
664            itz[0] += offset; // XXX: this can cause overflows
665        }
666        c->rasterizer.procs.zGrad3xv(c, itz);
667    }
668
669    if (ggl_unlikely(enables & GGL_ENABLE_FOG)) {
670        GLfixed itf[3];
671        lerp.iterators1616(itf, v0->fog, v1->fog, v2->fog);
672        c->rasterizer.procs.fogGrad3xv(c, itf);
673    }
674}
675
676
677static inline
678int compute_lod(ogles_context_t* c, int i,
679        int32_t s0, int32_t t0, int32_t s1, int32_t t1, int32_t s2, int32_t t2)
680{
681    // Compute mipmap level / primitive
682    // rho = sqrt( texelArea / area )
683    // lod = log2( rho )
684    // lod = log2( texelArea / area ) / 2
685    // lod = (log2( texelArea ) - log2( area )) / 2
686    const compute_iterators_t& lerp = c->lerp;
687    const GGLcoord area = abs(lerp.area());
688    const int w = c->textures.tmu[i].texture->surface.width;
689    const int h = c->textures.tmu[i].texture->surface.height;
690    const int shift = 16 + (16 - TRI_FRACTION_BITS);
691    int32_t texelArea = abs( gglMulx(s1-s0, t2-t0, shift) -
692            gglMulx(s2-s0, t1-t0, shift) )*w*h;
693    int log2TArea = (32-TRI_FRACTION_BITS  -1) - gglClz(texelArea);
694    int log2Area  = (32-TRI_FRACTION_BITS*2-1) - gglClz(area);
695    int lod = (log2TArea - log2Area + 1) >> 1;
696    return lod;
697}
698
699void lerp_texcoords(ogles_context_t* c,
700        vertex_t* v0, vertex_t* v1, vertex_t* v2)
701{
702    const compute_iterators_t& lerp = c->lerp;
703    int32_t itt[8] __attribute__((aligned(16)));
704    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
705        const texture_t& tmu = c->rasterizer.state.texture[i];
706        if (!tmu.enable)
707            continue;
708
709        // compute the jacobians using block floating-point
710        int32_t s0 = v0->texture[i].S;
711        int32_t t0 = v0->texture[i].T;
712        int32_t s1 = v1->texture[i].S;
713        int32_t t1 = v1->texture[i].T;
714        int32_t s2 = v2->texture[i].S;
715        int32_t t2 = v2->texture[i].T;
716
717        const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
718        if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
719            int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
720            c->rasterizer.procs.bindTextureLod(c, i,
721                    &c->textures.tmu[i].texture->mip(lod));
722        }
723
724        // premultiply (s,t) when clampling
725        if (tmu.s_wrap == GGL_CLAMP) {
726            const int width = tmu.surface.width;
727            s0 *= width;
728            s1 *= width;
729            s2 *= width;
730        }
731        if (tmu.t_wrap == GGL_CLAMP) {
732            const int height = tmu.surface.height;
733            t0 *= height;
734            t1 *= height;
735            t2 *= height;
736        }
737        itt[6] = -lerp.iteratorsScale(itt+0, s0, s1, s2);
738        itt[7] = -lerp.iteratorsScale(itt+3, t0, t1, t2);
739        c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
740    }
741}
742
743void lerp_texcoords_w(ogles_context_t* c,
744        vertex_t* v0, vertex_t* v1, vertex_t* v2)
745{
746    const compute_iterators_t& lerp = c->lerp;
747    int32_t itt[8] __attribute__((aligned(16)));
748    int32_t itw[3];
749
750    // compute W's scale to 2.30
751    int32_t w0 = v0->window.w;
752    int32_t w1 = v1->window.w;
753    int32_t w2 = v2->window.w;
754    int wscale = 32 - gglClz(w0|w1|w2);
755
756    // compute the jacobian using block floating-point
757    int sc = lerp.iteratorsScale(itw, w0, w1, w2);
758    sc +=  wscale - 16;
759    c->rasterizer.procs.wGrad3xv(c, itw);
760
761    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
762        const texture_t& tmu = c->rasterizer.state.texture[i];
763        if (!tmu.enable)
764            continue;
765
766        // compute the jacobians using block floating-point
767        int32_t s0 = v0->texture[i].S;
768        int32_t t0 = v0->texture[i].T;
769        int32_t s1 = v1->texture[i].S;
770        int32_t t1 = v1->texture[i].T;
771        int32_t s2 = v2->texture[i].S;
772        int32_t t2 = v2->texture[i].T;
773
774        const GLenum min_filter = c->textures.tmu[i].texture->min_filter;
775        if (ggl_unlikely(min_filter >= GL_NEAREST_MIPMAP_NEAREST)) {
776            int lod = compute_lod(c, i, s0, t0, s1, t1, s2, t2);
777            c->rasterizer.procs.bindTextureLod(c, i,
778                    &c->textures.tmu[i].texture->mip(lod));
779        }
780
781        // premultiply (s,t) when clampling
782        if (tmu.s_wrap == GGL_CLAMP) {
783            const int width = tmu.surface.width;
784            s0 *= width;
785            s1 *= width;
786            s2 *= width;
787        }
788        if (tmu.t_wrap == GGL_CLAMP) {
789            const int height = tmu.surface.height;
790            t0 *= height;
791            t1 *= height;
792            t2 *= height;
793        }
794
795        s0 = gglMulx(s0, w0, wscale);
796        t0 = gglMulx(t0, w0, wscale);
797        s1 = gglMulx(s1, w1, wscale);
798        t1 = gglMulx(t1, w1, wscale);
799        s2 = gglMulx(s2, w2, wscale);
800        t2 = gglMulx(t2, w2, wscale);
801
802        itt[6] = sc - lerp.iteratorsScale(itt+0, s0, s1, s2);
803        itt[7] = sc - lerp.iteratorsScale(itt+3, t0, t1, t2);
804        c->rasterizer.procs.texCoordGradScale8xv(c, i, itt);
805    }
806}
807
808
809static inline
810bool cull_triangle(ogles_context_t* c, vertex_t* v0, vertex_t* v1, vertex_t* v2)
811{
812    if (ggl_likely(c->cull.enable)) {
813        const GLenum winding = (c->lerp.area() > 0) ? GL_CW : GL_CCW;
814        const GLenum face = (winding == c->cull.frontFace) ? GL_FRONT : GL_BACK;
815        if (face == c->cull.cullFace)
816            return true; // culled!
817    }
818    return false;
819}
820
821static inline
822GLfixed frustumPlaneDist(int plane, const vec4_t& s)
823{
824    const GLfixed d = s.v[ plane >> 1 ];
825    return  ((plane & 1) ? (s.w - d) : (s.w + d));
826}
827
828static inline
829int32_t clipDivide(GLfixed a, GLfixed b) {
830    // returns a 4.28 fixed-point
831    return gglMulDivi(1LU<<28, a, b);
832}
833
834void clip_triangle(ogles_context_t* c,
835        vertex_t* v0, vertex_t* v1, vertex_t* v2)
836{
837    uint32_t all_cc = (v0->flags | v1->flags | v2->flags) & vertex_t::CLIP_ALL;
838
839    vertex_t *p0, *p1, *p2;
840    const int MAX_CLIPPING_PLANES = 6 + OGLES_MAX_CLIP_PLANES;
841    const int MAX_VERTICES = 3;
842
843    // Temporary buffer to hold the new vertices. Each plane can add up to
844    // two new vertices (because the polygon is convex).
845    // We need one extra element, to handle an overflow case when
846    // the polygon degenerates into something non convex.
847    vertex_t buffer[MAX_CLIPPING_PLANES * 2 + 1];   // ~3KB
848    vertex_t* buf = buffer;
849
850    // original list of vertices (polygon to clip, in fact this
851    // function works with an arbitrary polygon).
852    vertex_t* in[3] = { v0, v1, v2 };
853
854    // output lists (we need 2, which we use back and forth)
855    // (maximum outpout list's size is MAX_CLIPPING_PLANES + MAX_VERTICES)
856    // 2 more elements for overflow when non convex polygons.
857    vertex_t* out[2][MAX_CLIPPING_PLANES + MAX_VERTICES + 2];
858    unsigned int outi = 0;
859
860    // current input list
861    vertex_t** ivl = in;
862
863    // 3 input vertices, 0 in the output list, first plane
864    unsigned int ic = 3;
865
866    // User clip-planes first, the clipping is always done in eye-coordinate
867    // this is basically the same algorithm than for the view-volume
868    // clipping, except for the computation of the distance (vertex, plane)
869    // and the fact that we need to compute the eye-coordinates of each
870    // new vertex we create.
871
872    if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
873    {
874        unsigned int plane = 0;
875        uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
876        do {
877            if (cc & 1) {
878                // pointers to our output list (head and current)
879                vertex_t** const ovl = &out[outi][0];
880                vertex_t** output = ovl;
881                unsigned int oc = 0;
882                unsigned int sentinel = 0;
883                // previous vertice, compute distance to the plane
884                vertex_t* s = ivl[ic-1];
885                const vec4_t& equation = c->clipPlanes.plane[plane].equation;
886                GLfixed sd = dot4(equation.v, s->eye.v);
887                // clip each vertice against this plane...
888                for (unsigned int i=0 ; i<ic ; i++) {
889                    vertex_t* p = ivl[i];
890                    const GLfixed pd = dot4(equation.v, p->eye.v);
891                    if (sd >= 0) {
892                        if (pd >= 0) {
893                            // both inside
894                            *output++ = p;
895                            oc++;
896                        } else {
897                            // s inside, p outside (exiting)
898                            const GLfixed t = clipDivide(sd, sd-pd);
899                            c->arrays.clipEye(c, buf, t, p, s);
900                            *output++ = buf++;
901                            oc++;
902                            if (++sentinel >= 3)
903                                return; // non-convex polygon!
904                        }
905                    } else {
906                        if (pd >= 0) {
907                            // s outside (entering)
908                            if (pd) {
909                                const GLfixed t = clipDivide(pd, pd-sd);
910                                c->arrays.clipEye(c, buf, t, s, p);
911                                *output++ = buf++;
912                                oc++;
913                                if (++sentinel >= 3)
914                                    return; // non-convex polygon!
915                            }
916                            *output++ = p;
917                            oc++;
918                        } else {
919                           // both outside
920                        }
921                    }
922                    s = p;
923                    sd = pd;
924                }
925                // output list become the new input list
926                if (oc<3)
927                    return; // less than 3 vertices left? we're done!
928                ivl = ovl;
929                ic = oc;
930                outi = 1-outi;
931            }
932            cc >>= 1;
933            plane++;
934        } while (cc);
935    }
936
937    // frustum clip-planes
938    if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
939    {
940        unsigned int plane = 0;
941        uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
942        do {
943            if (cc & 1) {
944                // pointers to our output list (head and current)
945                vertex_t** const ovl = &out[outi][0];
946                vertex_t** output = ovl;
947                unsigned int oc = 0;
948                unsigned int sentinel = 0;
949                // previous vertice, compute distance to the plane
950                vertex_t* s = ivl[ic-1];
951                GLfixed sd = frustumPlaneDist(plane, s->clip);
952                // clip each vertice against this plane...
953                for (unsigned int i=0 ; i<ic ; i++) {
954                    vertex_t* p = ivl[i];
955                    const GLfixed pd = frustumPlaneDist(plane, p->clip);
956                    if (sd >= 0) {
957                        if (pd >= 0) {
958                            // both inside
959                            *output++ = p;
960                            oc++;
961                        } else {
962                            // s inside, p outside (exiting)
963                            const GLfixed t = clipDivide(sd, sd-pd);
964                            c->arrays.clipVertex(c, buf, t, p, s);
965                            *output++ = buf++;
966                            oc++;
967                            if (++sentinel >= 3)
968                                return; // non-convex polygon!
969                        }
970                    } else {
971                        if (pd >= 0) {
972                            // s outside (entering)
973                            if (pd) {
974                                const GLfixed t = clipDivide(pd, pd-sd);
975                                c->arrays.clipVertex(c, buf, t, s, p);
976                                *output++ = buf++;
977                                oc++;
978                                if (++sentinel >= 3)
979                                    return; // non-convex polygon!
980                            }
981                            *output++ = p;
982                            oc++;
983                        } else {
984                           // both outside
985                        }
986                    }
987                    s = p;
988                    sd = pd;
989                }
990                // output list become the new input list
991                if (oc<3)
992                    return; // less than 3 vertices left? we're done!
993                ivl = ovl;
994                ic = oc;
995                outi = 1-outi;
996            }
997            cc >>= 1;
998            plane++;
999        } while (cc);
1000    }
1001
1002    // finally we can render our triangles...
1003    p0 = ivl[0];
1004    p1 = ivl[1];
1005    for (unsigned int i=2 ; i<ic ; i++) {
1006        p2 = ivl[i];
1007        c->lerp.initTriangle(p0, p1, p2);
1008        if (cull_triangle(c, p0, p1, p2)) {
1009            p1 = p2;
1010            continue; // culled!
1011        }
1012        triangle(c, p0, p1, p2);
1013        p1 = p2;
1014    }
1015}
1016
1017unsigned int clip_line(ogles_context_t* c, vertex_t* s, vertex_t* p)
1018{
1019    const uint32_t all_cc = (s->flags | p->flags) & vertex_t::CLIP_ALL;
1020
1021    if (ggl_unlikely(all_cc & vertex_t::USER_CLIP_ALL))
1022    {
1023        unsigned int plane = 0;
1024        uint32_t cc = (all_cc & vertex_t::USER_CLIP_ALL) >> 8;
1025        do {
1026            if (cc & 1) {
1027                const vec4_t& equation = c->clipPlanes.plane[plane].equation;
1028                const GLfixed sd = dot4(equation.v, s->eye.v);
1029                const GLfixed pd = dot4(equation.v, p->eye.v);
1030                if (sd >= 0) {
1031                    if (pd >= 0) {
1032                        // both inside
1033                    } else {
1034                        // s inside, p outside (exiting)
1035                        const GLfixed t = clipDivide(sd, sd-pd);
1036                        c->arrays.clipEye(c, p, t, p, s);
1037                    }
1038                } else {
1039                    if (pd >= 0) {
1040                        // s outside (entering)
1041                        if (pd) {
1042                            const GLfixed t = clipDivide(pd, pd-sd);
1043                            c->arrays.clipEye(c, s, t, s, p);
1044                        }
1045                    } else {
1046                       // both outside
1047                       return 0;
1048                    }
1049                }
1050            }
1051            cc >>= 1;
1052            plane++;
1053        } while (cc);
1054    }
1055
1056    // frustum clip-planes
1057    if (all_cc & vertex_t::FRUSTUM_CLIP_ALL)
1058    {
1059        unsigned int plane = 0;
1060        uint32_t cc = all_cc & vertex_t::FRUSTUM_CLIP_ALL;
1061        do {
1062            if (cc & 1) {
1063                const GLfixed sd = frustumPlaneDist(plane, s->clip);
1064                const GLfixed pd = frustumPlaneDist(plane, p->clip);
1065                if (sd >= 0) {
1066                    if (pd >= 0) {
1067                        // both inside
1068                    } else {
1069                        // s inside, p outside (exiting)
1070                        const GLfixed t = clipDivide(sd, sd-pd);
1071                        c->arrays.clipVertex(c, p, t, p, s);
1072                    }
1073                } else {
1074                    if (pd >= 0) {
1075                        // s outside (entering)
1076                        if (pd) {
1077                            const GLfixed t = clipDivide(pd, pd-sd);
1078                            c->arrays.clipVertex(c, s, t, s, p);
1079                        }
1080                    } else {
1081                       // both outside
1082                       return 0;
1083                    }
1084                }
1085            }
1086            cc >>= 1;
1087            plane++;
1088        } while (cc);
1089    }
1090
1091    return 2;
1092}
1093
1094
1095}; // namespace android
1096