1/* libs/pixelflinger/trap.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <stdlib.h>
21
22#include "trap.h"
23#include "picker.h"
24
25#include <cutils/log.h>
26#include <cutils/memory.h>
27
28namespace android {
29
30// ----------------------------------------------------------------------------
31
32// enable to see triangles edges
33#define DEBUG_TRANGLES  0
34
35// ----------------------------------------------------------------------------
36
37static void pointx_validate(void *con, const GGLcoord* c, GGLcoord r);
38static void pointx(void *con, const GGLcoord* c, GGLcoord r);
39static void aa_pointx(void *con, const GGLcoord* c, GGLcoord r);
40static void aa_nice_pointx(void *con, const GGLcoord* c, GGLcoord r);
41
42static void linex_validate(void *con, const GGLcoord* v0, const GGLcoord* v1, GGLcoord w);
43static void linex(void *con, const GGLcoord* v0, const GGLcoord* v1, GGLcoord w);
44static void aa_linex(void *con, const GGLcoord* v0, const GGLcoord* v1, GGLcoord w);
45
46static void recti_validate(void* c, GGLint l, GGLint t, GGLint r, GGLint b);
47static void recti(void* c, GGLint l, GGLint t, GGLint r, GGLint b);
48
49static void trianglex_validate(void*,
50        const GGLcoord*, const GGLcoord*, const GGLcoord*);
51static void trianglex_small(void*,
52        const GGLcoord*, const GGLcoord*, const GGLcoord*);
53static void trianglex_big(void*,
54        const GGLcoord*, const GGLcoord*, const GGLcoord*);
55static void aa_trianglex(void*,
56        const GGLcoord*, const GGLcoord*, const GGLcoord*);
57static void trianglex_debug(void* con,
58        const GGLcoord*, const GGLcoord*, const GGLcoord*);
59
60static void aapolyx(void* con,
61        const GGLcoord* pts, int count);
62
63static inline int min(int a, int b) CONST;
64static inline int max(int a, int b) CONST;
65static inline int min(int a, int b, int c) CONST;
66static inline int max(int a, int b, int c) CONST;
67
68// ----------------------------------------------------------------------------
69#if 0
70#pragma mark -
71#pragma mark Tools
72#endif
73
74inline int min(int a, int b) {
75    return a<b ? a : b;
76}
77inline int max(int a, int b) {
78    return a<b ? b : a;
79}
80inline int min(int a, int b, int c) {
81    return min(a,min(b,c));
82}
83inline int max(int a, int b, int c) {
84    return max(a,max(b,c));
85}
86
87template <typename T>
88static inline void swap(T& a, T& b) {
89    T t(a);
90    a = b;
91    b = t;
92}
93
94static void
95triangle_dump_points( const GGLcoord*  v0,
96                      const GGLcoord*  v1,
97				 	  const GGLcoord*  v2 )
98{
99    float tri = 1.0f / TRI_ONE;
100  LOGD(     "  P0=(%.3f, %.3f)  [%08x, %08x]\n"
101            "  P1=(%.3f, %.3f)  [%08x, %08x]\n"
102            "  P2=(%.3f, %.3f)  [%08x, %08x]\n",
103		v0[0]*tri, v0[1]*tri, v0[0], v0[1],
104		v1[0]*tri, v1[1]*tri, v1[0], v1[1],
105		v2[0]*tri, v2[1]*tri, v2[0], v2[1] );
106}
107
108// ----------------------------------------------------------------------------
109#if 0
110#pragma mark -
111#pragma mark Misc
112#endif
113
114void ggl_init_trap(context_t* c)
115{
116    ggl_state_changed(c, GGL_PIXEL_PIPELINE_STATE|GGL_TMU_STATE|GGL_CB_STATE);
117}
118
119void ggl_state_changed(context_t* c, int flags)
120{
121    if (ggl_likely(!c->dirty)) {
122        c->procs.pointx     = pointx_validate;
123        c->procs.linex      = linex_validate;
124        c->procs.recti      = recti_validate;
125        c->procs.trianglex  = trianglex_validate;
126    }
127    c->dirty |= uint32_t(flags);
128}
129
130// ----------------------------------------------------------------------------
131#if 0
132#pragma mark -
133#pragma mark Point
134#endif
135
136void pointx_validate(void *con, const GGLcoord* v, GGLcoord rad)
137{
138    GGL_CONTEXT(c, con);
139    ggl_pick(c);
140    if (c->state.needs.p & GGL_NEED_MASK(P_AA)) {
141        if (c->state.enables & GGL_ENABLE_POINT_AA_NICE) {
142            c->procs.pointx = aa_nice_pointx;
143        } else {
144            c->procs.pointx = aa_pointx;
145        }
146    } else {
147        c->procs.pointx = pointx;
148    }
149    c->procs.pointx(con, v, rad);
150}
151
152void pointx(void *con, const GGLcoord* v, GGLcoord rad)
153{
154    GGL_CONTEXT(c, con);
155    GGLcoord halfSize = TRI_ROUND(rad) >> 1;
156    if (halfSize == 0)
157        halfSize = TRI_HALF;
158    GGLcoord xc = v[0];
159    GGLcoord yc = v[1];
160    if (halfSize & TRI_HALF) { // size odd
161        xc = TRI_FLOOR(xc) + TRI_HALF;
162        yc = TRI_FLOOR(yc) + TRI_HALF;
163    } else { // size even
164        xc = TRI_ROUND(xc);
165        yc = TRI_ROUND(yc);
166    }
167    GGLint l = (xc - halfSize) >> TRI_FRACTION_BITS;
168    GGLint t = (yc - halfSize) >> TRI_FRACTION_BITS;
169    GGLint r = (xc + halfSize) >> TRI_FRACTION_BITS;
170    GGLint b = (yc + halfSize) >> TRI_FRACTION_BITS;
171    recti(c, l, t, r, b);
172}
173
174// This way of computing the coverage factor, is more accurate and gives
175// better results for small circles, but it is also a lot slower.
176// Here we use super-sampling.
177static int32_t coverageNice(GGLcoord x, GGLcoord y,
178        GGLcoord rmin, GGLcoord rmax, GGLcoord rr)
179{
180    const GGLcoord d2 = x*x + y*y;
181    if (d2 >= rmax) return 0;
182    if (d2 < rmin)  return 0x7FFF;
183
184    const int kSamples              =  4;
185    const int kInc                  =  4;    // 1/4 = 0.25
186    const int kCoverageUnit         =  1;    // 1/(4^2) = 0.0625
187    const GGLcoord kCoordOffset     = -6;    // -0.375
188
189    int hits = 0;
190    int x_sample = x + kCoordOffset;
191    for (int i=0 ; i<kSamples ; i++, x_sample += kInc) {
192        const int xval = rr - (x_sample * x_sample);
193        int y_sample = y + kCoordOffset;
194        for (int j=0 ; j<kSamples ; j++, y_sample += kInc) {
195            if (xval - (y_sample * y_sample) > 0)
196                hits += kCoverageUnit;
197        }
198    }
199    return min(0x7FFF, hits << (15 - kSamples));
200}
201
202
203void aa_nice_pointx(void *con, const GGLcoord* v, GGLcoord size)
204{
205    GGL_CONTEXT(c, con);
206
207    GGLcoord rad = ((size + 1)>>1);
208    GGLint l = (v[0] - rad) >> TRI_FRACTION_BITS;
209    GGLint t = (v[1] - rad) >> TRI_FRACTION_BITS;
210    GGLint r = (v[0] + rad + (TRI_ONE-1)) >> TRI_FRACTION_BITS;
211    GGLint b = (v[1] + rad + (TRI_ONE-1)) >> TRI_FRACTION_BITS;
212    GGLcoord xstart = TRI_FROM_INT(l) - v[0] + TRI_HALF;
213    GGLcoord ystart = TRI_FROM_INT(t) - v[1] + TRI_HALF;
214
215    // scissor...
216    if (l < GGLint(c->state.scissor.left)) {
217        xstart += TRI_FROM_INT(c->state.scissor.left-l);
218        l = GGLint(c->state.scissor.left);
219    }
220    if (t < GGLint(c->state.scissor.top)) {
221        ystart += TRI_FROM_INT(c->state.scissor.top-t);
222        t = GGLint(c->state.scissor.top);
223    }
224    if (r > GGLint(c->state.scissor.right)) {
225        r = GGLint(c->state.scissor.right);
226    }
227    if (b > GGLint(c->state.scissor.bottom)) {
228        b = GGLint(c->state.scissor.bottom);
229    }
230
231    int xc = r - l;
232    int yc = b - t;
233    if (xc>0 && yc>0) {
234        int16_t* covPtr = c->state.buffers.coverage;
235        const int32_t sqr2Over2 = 0xC; // rounded up
236        GGLcoord rr = rad*rad;
237        GGLcoord rmin = (rad - sqr2Over2)*(rad - sqr2Over2);
238        GGLcoord rmax = (rad + sqr2Over2)*(rad + sqr2Over2);
239        GGLcoord y = ystart;
240        c->iterators.xl = l;
241        c->iterators.xr = r;
242        c->init_y(c, t);
243        do {
244            // compute coverage factors for each pixel
245            GGLcoord x = xstart;
246            for (int i=l ; i<r ; i++) {
247                covPtr[i] = coverageNice(x, y, rmin, rmax, rr);
248                x += TRI_ONE;
249            }
250            y += TRI_ONE;
251            c->scanline(c);
252            c->step_y(c);
253        } while (--yc);
254    }
255}
256
257// This is a cheap way of computing the coverage factor for a circle.
258// We just lerp between the circles of radii r-sqrt(2)/2 and r+sqrt(2)/2
259static inline int32_t coverageFast(GGLcoord x, GGLcoord y,
260        GGLcoord rmin, GGLcoord rmax, GGLcoord scale)
261{
262    const GGLcoord d2 = x*x + y*y;
263    if (d2 >= rmax) return 0;
264    if (d2 < rmin)  return 0x7FFF;
265    return 0x7FFF - (d2-rmin)*scale;
266}
267
268void aa_pointx(void *con, const GGLcoord* v, GGLcoord size)
269{
270    GGL_CONTEXT(c, con);
271
272    GGLcoord rad = ((size + 1)>>1);
273    GGLint l = (v[0] - rad) >> TRI_FRACTION_BITS;
274    GGLint t = (v[1] - rad) >> TRI_FRACTION_BITS;
275    GGLint r = (v[0] + rad + (TRI_ONE-1)) >> TRI_FRACTION_BITS;
276    GGLint b = (v[1] + rad + (TRI_ONE-1)) >> TRI_FRACTION_BITS;
277    GGLcoord xstart = TRI_FROM_INT(l) - v[0] + TRI_HALF;
278    GGLcoord ystart = TRI_FROM_INT(t) - v[1] + TRI_HALF;
279
280    // scissor...
281    if (l < GGLint(c->state.scissor.left)) {
282        xstart += TRI_FROM_INT(c->state.scissor.left-l);
283        l = GGLint(c->state.scissor.left);
284    }
285    if (t < GGLint(c->state.scissor.top)) {
286        ystart += TRI_FROM_INT(c->state.scissor.top-t);
287        t = GGLint(c->state.scissor.top);
288    }
289    if (r > GGLint(c->state.scissor.right)) {
290        r = GGLint(c->state.scissor.right);
291    }
292    if (b > GGLint(c->state.scissor.bottom)) {
293        b = GGLint(c->state.scissor.bottom);
294    }
295
296    int xc = r - l;
297    int yc = b - t;
298    if (xc>0 && yc>0) {
299        int16_t* covPtr = c->state.buffers.coverage;
300        rad <<= 4;
301        const int32_t sqr2Over2 = 0xB5;    // fixed-point 24.8
302        GGLcoord rmin = rad - sqr2Over2;
303        GGLcoord rmax = rad + sqr2Over2;
304        GGLcoord scale;
305        rmin *= rmin;
306        rmax *= rmax;
307        scale = 0x800000 / (rmax - rmin);
308        rmin >>= 8;
309        rmax >>= 8;
310
311        GGLcoord y = ystart;
312        c->iterators.xl = l;
313        c->iterators.xr = r;
314        c->init_y(c, t);
315
316        do {
317            // compute coverage factors for each pixel
318            GGLcoord x = xstart;
319            for (int i=l ; i<r ; i++) {
320                covPtr[i] = coverageFast(x, y, rmin, rmax, scale);
321                x += TRI_ONE;
322            }
323            y += TRI_ONE;
324            c->scanline(c);
325            c->step_y(c);
326        } while (--yc);
327    }
328}
329
330// ----------------------------------------------------------------------------
331#if 0
332#pragma mark -
333#pragma mark Line
334#endif
335
336void linex_validate(void *con, const GGLcoord* v0, const GGLcoord* v1, GGLcoord w)
337{
338    GGL_CONTEXT(c, con);
339    ggl_pick(c);
340    if (c->state.needs.p & GGL_NEED_MASK(P_AA)) {
341        c->procs.linex = aa_linex;
342    } else {
343        c->procs.linex = linex;
344    }
345    c->procs.linex(con, v0, v1, w);
346}
347
348static void linex(void *con, const GGLcoord* v0, const GGLcoord* v1, GGLcoord width)
349{
350    GGL_CONTEXT(c, con);
351    GGLcoord v[4][2];
352    v[0][0] = v0[0];    v[0][1] = v0[1];
353    v[1][0] = v1[0];    v[1][1] = v1[1];
354    v0 = v[0];
355    v1 = v[1];
356    const GGLcoord dx = abs(v0[0] - v1[0]);
357    const GGLcoord dy = abs(v0[1] - v1[1]);
358    GGLcoord nx, ny;
359    nx = ny = 0;
360
361    GGLcoord halfWidth = TRI_ROUND(width) >> 1;
362    if (halfWidth == 0)
363        halfWidth = TRI_HALF;
364
365    ((dx > dy) ? ny : nx) = halfWidth;
366    v[2][0] = v1[0];    v[2][1] = v1[1];
367    v[3][0] = v0[0];    v[3][1] = v0[1];
368    v[0][0] += nx;      v[0][1] += ny;
369    v[1][0] += nx;      v[1][1] += ny;
370    v[2][0] -= nx;      v[2][1] -= ny;
371    v[3][0] -= nx;      v[3][1] -= ny;
372    trianglex_big(con, v[0], v[1], v[2]);
373    trianglex_big(con, v[0], v[2], v[3]);
374}
375
376static void aa_linex(void *con, const GGLcoord* v0, const GGLcoord* v1, GGLcoord width)
377{
378    GGL_CONTEXT(c, con);
379    GGLcoord v[4][2];
380    v[0][0] = v0[0];    v[0][1] = v0[1];
381    v[1][0] = v1[0];    v[1][1] = v1[1];
382    v0 = v[0];
383    v1 = v[1];
384
385    const GGLcoord dx = v0[0] - v1[0];
386    const GGLcoord dy = v0[1] - v1[1];
387    GGLcoord nx = -dy;
388    GGLcoord ny =  dx;
389
390    // generally, this will be well below 1.0
391    const GGLfixed norm = gglMulx(width, gglSqrtRecipx(nx*nx+ny*ny), 4);
392    nx = gglMulx(nx, norm, 21);
393    ny = gglMulx(ny, norm, 21);
394
395    v[2][0] = v1[0];    v[2][1] = v1[1];
396    v[3][0] = v0[0];    v[3][1] = v0[1];
397    v[0][0] += nx;      v[0][1] += ny;
398    v[1][0] += nx;      v[1][1] += ny;
399    v[2][0] -= nx;      v[2][1] -= ny;
400    v[3][0] -= nx;      v[3][1] -= ny;
401    aapolyx(con, v[0], 4);
402}
403
404
405// ----------------------------------------------------------------------------
406#if 0
407#pragma mark -
408#pragma mark Rect
409#endif
410
411void recti_validate(void *con, GGLint l, GGLint t, GGLint r, GGLint b)
412{
413    GGL_CONTEXT(c, con);
414    ggl_pick(c);
415    c->procs.recti = recti;
416    c->procs.recti(con, l, t, r, b);
417}
418
419void recti(void* con, GGLint l, GGLint t, GGLint r, GGLint b)
420{
421    GGL_CONTEXT(c, con);
422
423    // scissor...
424    if (l < GGLint(c->state.scissor.left))
425        l = GGLint(c->state.scissor.left);
426    if (t < GGLint(c->state.scissor.top))
427        t = GGLint(c->state.scissor.top);
428    if (r > GGLint(c->state.scissor.right))
429        r = GGLint(c->state.scissor.right);
430    if (b > GGLint(c->state.scissor.bottom))
431        b = GGLint(c->state.scissor.bottom);
432
433    int xc = r - l;
434    int yc = b - t;
435    if (xc>0 && yc>0) {
436        c->iterators.xl = l;
437        c->iterators.xr = r;
438        c->init_y(c, t);
439        c->rect(c, yc);
440    }
441}
442
443// ----------------------------------------------------------------------------
444#if 0
445#pragma mark -
446#pragma mark Triangle / Debugging
447#endif
448
449static void scanline_set(context_t* c)
450{
451    int32_t x = c->iterators.xl;
452    size_t ct = c->iterators.xr - x;
453    int32_t y = c->iterators.y;
454    surface_t* cb = &(c->state.buffers.color);
455    const GGLFormat* fp = &(c->formats[cb->format]);
456    uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
457                            (x + (cb->stride * y)) * fp->size;
458    const size_t size = ct * fp->size;
459    memset(dst, 0xFF, size);
460}
461
462static void trianglex_debug(void* con,
463        const GGLcoord* v0, const GGLcoord* v1, const GGLcoord* v2)
464{
465    GGL_CONTEXT(c, con);
466    if (c->state.needs.p & GGL_NEED_MASK(P_AA)) {
467        aa_trianglex(con,v0,v1,v2);
468    } else {
469        trianglex_big(con,v0,v1,v2);
470    }
471	void (*save_scanline)(context_t*)  = c->scanline;
472    c->scanline = scanline_set;
473    linex(con, v0, v1, TRI_ONE);
474    linex(con, v1, v2, TRI_ONE);
475    linex(con, v2, v0, TRI_ONE);
476    c->scanline = save_scanline;
477}
478
479static void trianglex_xor(void* con,
480        const GGLcoord* v0, const GGLcoord* v1, const GGLcoord* v2)
481{
482    trianglex_big(con,v0,v1,v2);
483    trianglex_small(con,v0,v1,v2);
484}
485
486// ----------------------------------------------------------------------------
487#if 0
488#pragma mark -
489#pragma mark Triangle
490#endif
491
492void trianglex_validate(void *con,
493        const GGLcoord* v0, const GGLcoord* v1, const GGLcoord* v2)
494{
495    GGL_CONTEXT(c, con);
496    ggl_pick(c);
497    if (c->state.needs.p & GGL_NEED_MASK(P_AA)) {
498        c->procs.trianglex = DEBUG_TRANGLES ? trianglex_debug : aa_trianglex;
499    } else {
500        c->procs.trianglex = DEBUG_TRANGLES ? trianglex_debug : trianglex_big;
501    }
502    c->procs.trianglex(con, v0, v1, v2);
503}
504
505// ----------------------------------------------------------------------------
506
507void trianglex_small(void* con,
508        const GGLcoord* v0, const GGLcoord* v1, const GGLcoord* v2)
509{
510    GGL_CONTEXT(c, con);
511
512    // vertices are in 28.4 fixed point, which allows
513    // us to use 32 bits multiplies below.
514    int32_t x0 = v0[0];
515    int32_t y0 = v0[1];
516    int32_t x1 = v1[0];
517    int32_t y1 = v1[1];
518    int32_t x2 = v2[0];
519    int32_t y2 = v2[1];
520
521    int32_t dx01 = x0 - x1;
522    int32_t dy20 = y2 - y0;
523    int32_t dy01 = y0 - y1;
524    int32_t dx20 = x2 - x0;
525
526    // The code below works only with CCW triangles
527    // so if we get a CW triangle, we need to swap two of its vertices
528    if (dx01*dy20 < dy01*dx20) {
529        swap(x0, x1);
530        swap(y0, y1);
531        dx01 = x0 - x1;
532        dy01 = y0 - y1;
533        dx20 = x2 - x0;
534        dy20 = y2 - y0;
535    }
536    int32_t dx12 = x1 - x2;
537    int32_t dy12 = y1 - y2;
538
539    // bounding box & scissor
540    const int32_t bminx = TRI_FLOOR(min(x0, x1, x2)) >> TRI_FRACTION_BITS;
541    const int32_t bminy = TRI_FLOOR(min(y0, y1, y2)) >> TRI_FRACTION_BITS;
542    const int32_t bmaxx = TRI_CEIL( max(x0, x1, x2)) >> TRI_FRACTION_BITS;
543    const int32_t bmaxy = TRI_CEIL( max(y0, y1, y2)) >> TRI_FRACTION_BITS;
544    const int32_t minx = max(bminx, c->state.scissor.left);
545    const int32_t miny = max(bminy, c->state.scissor.top);
546    const int32_t maxx = min(bmaxx, c->state.scissor.right);
547    const int32_t maxy = min(bmaxy, c->state.scissor.bottom);
548    if ((minx >= maxx) || (miny >= maxy))
549        return; // too small or clipped out...
550
551    // step equations to the bounding box and snap to pixel center
552    const int32_t my = (miny << TRI_FRACTION_BITS) + TRI_HALF;
553    const int32_t mx = (minx << TRI_FRACTION_BITS) + TRI_HALF;
554    int32_t ey0 = dy01 * (x0 - mx) - dx01 * (y0 - my);
555    int32_t ey1 = dy12 * (x1 - mx) - dx12 * (y1 - my);
556    int32_t ey2 = dy20 * (x2 - mx) - dx20 * (y2 - my);
557
558    // right-exclusive fill rule, to avoid rare cases
559    // of over drawing
560    if (dy01<0 || (dy01 == 0 && dx01>0)) ey0++;
561    if (dy12<0 || (dy12 == 0 && dx12>0)) ey1++;
562    if (dy20<0 || (dy20 == 0 && dx20>0)) ey2++;
563
564    c->init_y(c, miny);
565    for (int32_t y = miny; y < maxy; y++) {
566        register int32_t ex0 = ey0;
567        register int32_t ex1 = ey1;
568        register int32_t ex2 = ey2;
569        register int32_t xl, xr;
570        for (xl=minx ; xl<maxx ; xl++) {
571            if (ex0>0 && ex1>0 && ex2>0)
572                break; // all strictly positive
573            ex0 -= dy01 << TRI_FRACTION_BITS;
574            ex1 -= dy12 << TRI_FRACTION_BITS;
575            ex2 -= dy20 << TRI_FRACTION_BITS;
576        }
577        xr = xl;
578        for ( ; xr<maxx ; xr++) {
579            if (!(ex0>0 && ex1>0 && ex2>0))
580                break; // not all strictly positive
581            ex0 -= dy01 << TRI_FRACTION_BITS;
582            ex1 -= dy12 << TRI_FRACTION_BITS;
583            ex2 -= dy20 << TRI_FRACTION_BITS;
584        }
585
586        if (xl < xr) {
587            c->iterators.xl = xl;
588            c->iterators.xr = xr;
589            c->scanline(c);
590        }
591        c->step_y(c);
592
593        ey0 += dx01 << TRI_FRACTION_BITS;
594        ey1 += dx12 << TRI_FRACTION_BITS;
595        ey2 += dx20 << TRI_FRACTION_BITS;
596    }
597}
598
599// ----------------------------------------------------------------------------
600#if 0
601#pragma mark -
602#endif
603
604// the following routine fills a triangle via edge stepping, which
605// unfortunately requires divisions in the setup phase to get right,
606// it should probably only be used for relatively large trianges
607
608
609// x = y*DX/DY    (ou DX and DY are constants, DY > 0, et y >= 0)
610//
611// for an equation of the type:
612//      x' = y*K/2^p     (with K and p constants "carefully chosen")
613//
614// We can now do a DDA without precision loss. We define 'e' by:
615//      x' - x = y*(DX/DY - K/2^p) = y*e
616//
617// If we choose K = round(DX*2^p/DY) then,
618//      abs(e) <= 1/2^(p+1) by construction
619//
620// therefore abs(x'-x) = y*abs(e) <= y/2^(p+1) <= DY/2^(p+1) <= DMAX/2^(p+1)
621//
622// which means that if DMAX <= 2^p, therefore abs(x-x') <= 1/2, including
623// at the last line. In fact, it's even a strict inequality except in one
624// extrem case (DY == DMAX et e = +/- 1/2)
625//
626// Applying that to our coordinates, we need 2^p >= 4096*16 = 65536
627// so p = 16 is enough, we're so lucky!
628
629const int TRI_ITERATORS_BITS = 16;
630
631struct Edge
632{
633  int32_t  x;      // edge position in 16.16 coordinates
634  int32_t  x_incr; // on each step, increment x by that amount
635  int32_t  y_top;  // starting scanline, 16.4 format
636  int32_t  y_bot;
637};
638
639static void
640edge_dump( Edge*  edge )
641{
642  LOGI( "  top=%d (%.3f)  bot=%d (%.3f)  x=%d (%.3f)  ix=%d (%.3f)",
643        edge->y_top, edge->y_top/float(TRI_ONE),
644		edge->y_bot, edge->y_bot/float(TRI_ONE),
645		edge->x, edge->x/float(FIXED_ONE),
646		edge->x_incr, edge->x_incr/float(FIXED_ONE) );
647}
648
649static void
650triangle_dump_edges( Edge*  edges,
651                     int            count )
652{
653    LOGI( "%d edge%s:\n", count, count == 1 ? "" : "s" );
654	for ( ; count > 0; count--, edges++ )
655	  edge_dump( edges );
656}
657
658// the following function sets up an edge, it assumes
659// that ymin and ymax are in already in the 'reduced'
660// format
661static __attribute__((noinline))
662void edge_setup(
663        Edge*           edges,
664        int*            pcount,
665        const GGLcoord* p1,
666        const GGLcoord* p2,
667        int32_t         ymin,
668        int32_t         ymax )
669{
670	const GGLfixed*  top = p1;
671	const GGLfixed*  bot = p2;
672	Edge*    edge = edges + *pcount;
673
674	if (top[1] > bot[1]) {
675        swap(top, bot);
676	}
677
678	int  y1 = top[1] | 1;
679	int  y2 = bot[1] | 1;
680	int  dy = y2 - y1;
681
682	if ( dy == 0 || y1 > ymax || y2 < ymin )
683		return;
684
685	if ( y1 > ymin )
686		ymin = TRI_SNAP_NEXT_HALF(y1);
687
688	if ( y2 < ymax )
689		ymax = TRI_SNAP_PREV_HALF(y2);
690
691	if ( ymin > ymax )  // when the edge doesn't cross any scanline
692	  return;
693
694	const int x1 = top[0];
695	const int dx = bot[0] - x1;
696    const int shift = TRI_ITERATORS_BITS - TRI_FRACTION_BITS;
697
698	// setup edge fields
699    // We add 0.5 to edge->x here because it simplifies the rounding
700    // in triangle_sweep_edges() -- this doesn't change the ordering of 'x'
701	edge->x      = (x1 << shift) + (1LU << (TRI_ITERATORS_BITS-1));
702	edge->x_incr = 0;
703	edge->y_top  = ymin;
704	edge->y_bot  = ymax;
705
706	if (ggl_likely(ymin <= ymax && dx)) {
707        edge->x_incr = gglDivQ16(dx, dy);
708    }
709    if (ggl_likely(y1 < ymin)) {
710        int32_t xadjust = (edge->x_incr * (ymin-y1)) >> TRI_FRACTION_BITS;
711        edge->x += xadjust;
712    }
713
714	++*pcount;
715}
716
717
718static void
719triangle_sweep_edges( Edge*  left,
720                      Edge*  right,
721					  int            ytop,
722					  int            ybot,
723					  context_t*     c )
724{
725    int count = ((ybot - ytop)>>TRI_FRACTION_BITS) + 1;
726    if (count<=0) return;
727
728    // sort the edges horizontally
729    if ((left->x > right->x) ||
730        ((left->x == right->x) && (left->x_incr > right->x_incr))) {
731        swap(left, right);
732    }
733
734    int left_x = left->x;
735    int right_x = right->x;
736    const int left_xi = left->x_incr;
737    const int right_xi  = right->x_incr;
738    left->x  += left_xi * count;
739    right->x += right_xi * count;
740
741	const int xmin = c->state.scissor.left;
742	const int xmax = c->state.scissor.right;
743    do {
744        // horizontal scissoring
745        const int32_t xl = max(left_x  >> TRI_ITERATORS_BITS, xmin);
746        const int32_t xr = min(right_x >> TRI_ITERATORS_BITS, xmax);
747        left_x  += left_xi;
748        right_x += right_xi;
749        // invoke the scanline rasterizer
750        if (ggl_likely(xl < xr)) {
751            c->iterators.xl = xl;
752            c->iterators.xr = xr;
753            c->scanline(c);
754        }
755		c->step_y(c);
756	} while (--count);
757}
758
759
760void trianglex_big(void* con,
761        const GGLcoord* v0, const GGLcoord* v1, const GGLcoord* v2)
762{
763    GGL_CONTEXT(c, con);
764
765    Edge edges[3];
766	int num_edges = 0;
767	int32_t ymin = TRI_FROM_INT(c->state.scissor.top)    + TRI_HALF;
768	int32_t ymax = TRI_FROM_INT(c->state.scissor.bottom) - TRI_HALF;
769
770	edge_setup( edges, &num_edges, v0, v1, ymin, ymax );
771	edge_setup( edges, &num_edges, v0, v2, ymin, ymax );
772	edge_setup( edges, &num_edges, v1, v2, ymin, ymax );
773
774    if (ggl_unlikely(num_edges<2))  // for really tiny triangles that don't
775		return;                     // cross any scanline centers
776
777    Edge* left  = &edges[0];
778    Edge* right = &edges[1];
779    Edge* other = &edges[2];
780    int32_t y_top = min(left->y_top, right->y_top);
781    int32_t y_bot = max(left->y_bot, right->y_bot);
782
783	if (ggl_likely(num_edges==3)) {
784        y_top = min(y_top, edges[2].y_top);
785        y_bot = max(y_bot, edges[2].y_bot);
786		if (edges[0].y_top > y_top) {
787            other = &edges[0];
788            left  = &edges[2];
789		} else if (edges[1].y_top > y_top) {
790            other = &edges[1];
791            right = &edges[2];
792		}
793    }
794
795    c->init_y(c, y_top >> TRI_FRACTION_BITS);
796
797    int32_t y_mid = min(left->y_bot, right->y_bot);
798    triangle_sweep_edges( left, right, y_top, y_mid, c );
799
800    // second scanline sweep loop, if necessary
801    y_mid += TRI_ONE;
802    if (y_mid <= y_bot) {
803        ((left->y_bot == y_bot) ? right : left) = other;
804        if (other->y_top < y_mid) {
805            other->x += other->x_incr;
806        }
807        triangle_sweep_edges( left, right, y_mid, y_bot, c );
808    }
809}
810
811void aa_trianglex(void* con,
812        const GGLcoord* a, const GGLcoord* b, const GGLcoord* c)
813{
814    GGLcoord pts[6] = { a[0], a[1], b[0], b[1], c[0], c[1] };
815    aapolyx(con, pts, 3);
816}
817
818// ----------------------------------------------------------------------------
819#if 0
820#pragma mark -
821#endif
822
823struct AAEdge
824{
825    GGLfixed x;         // edge position in 12.16 coordinates
826    GGLfixed x_incr;    // on each y step, increment x by that amount
827    GGLfixed y_incr;    // on each x step, increment y by that amount
828    int16_t y_top;      // starting scanline, 12.4 format
829    int16_t y_bot;      // starting scanline, 12.4 format
830    void dump();
831};
832
833void AAEdge::dump()
834{
835    float tri  = 1.0f / TRI_ONE;
836    float iter = 1.0f / (1<<TRI_ITERATORS_BITS);
837    float fix  = 1.0f / FIXED_ONE;
838    LOGD(   "x=%08x (%.3f), "
839            "x_incr=%08x (%.3f), y_incr=%08x (%.3f), "
840            "y_top=%08x (%.3f), y_bot=%08x (%.3f) ",
841        x, x*fix,
842        x_incr, x_incr*iter,
843        y_incr, y_incr*iter,
844        y_top, y_top*tri,
845        y_bot, y_bot*tri );
846}
847
848// the following function sets up an edge, it assumes
849// that ymin and ymax are in already in the 'reduced'
850// format
851static __attribute__((noinline))
852void aa_edge_setup(
853        AAEdge*         edges,
854        int*            pcount,
855        const GGLcoord* p1,
856        const GGLcoord* p2,
857        int32_t         ymin,
858        int32_t         ymax )
859{
860    const GGLfixed*  top = p1;
861    const GGLfixed*  bot = p2;
862    AAEdge* edge = edges + *pcount;
863
864    if (top[1] > bot[1])
865        swap(top, bot);
866
867    int  y1 = top[1];
868    int  y2 = bot[1];
869    int  dy = y2 - y1;
870
871    if (dy==0 || y1>ymax || y2<ymin)
872        return;
873
874    if (y1 > ymin)
875        ymin = y1;
876
877    if (y2 < ymax)
878        ymax = y2;
879
880    const int x1 = top[0];
881    const int dx = bot[0] - x1;
882    const int shift = FIXED_BITS - TRI_FRACTION_BITS;
883
884    // setup edge fields
885    edge->x      = x1 << shift;
886    edge->x_incr = 0;
887    edge->y_top  = ymin;
888    edge->y_bot  = ymax;
889    edge->y_incr = 0x7FFFFFFF;
890
891    if (ggl_likely(ymin <= ymax && dx)) {
892        edge->x_incr = gglDivQ16(dx, dy);
893        if (dx != 0) {
894            edge->y_incr = abs(gglDivQ16(dy, dx));
895        }
896    }
897    if (ggl_likely(y1 < ymin)) {
898        int32_t xadjust = (edge->x_incr * (ymin-y1))
899                >> (TRI_FRACTION_BITS + TRI_ITERATORS_BITS - FIXED_BITS);
900        edge->x += xadjust;
901    }
902
903    ++*pcount;
904}
905
906
907typedef int (*compar_t)(const void*, const void*);
908static int compare_edges(const AAEdge *e0, const AAEdge *e1) {
909    if (e0->y_top > e1->y_top)      return 1;
910    if (e0->y_top < e1->y_top)      return -1;
911    if (e0->x > e1->x)              return 1;
912    if (e0->x < e1->x)              return -1;
913    if (e0->x_incr > e1->x_incr)    return 1;
914    if (e0->x_incr < e1->x_incr)    return -1;
915    return 0; // same edges, should never happen
916}
917
918static inline
919void SET_COVERAGE(int16_t*& p, int32_t value, ssize_t n)
920{
921    android_memset16((uint16_t*)p, value, n*2);
922    p += n;
923}
924
925static inline
926void ADD_COVERAGE(int16_t*& p, int32_t value)
927{
928    value = *p + value;
929    if (value >= 0x8000)
930        value = 0x7FFF;
931    *p++ = value;
932}
933
934static inline
935void SUB_COVERAGE(int16_t*& p, int32_t value)
936{
937    value = *p - value;
938    value &= ~(value>>31);
939    *p++ = value;
940}
941
942void aapolyx(void* con,
943        const GGLcoord* pts, int count)
944{
945    /*
946     * NOTE: This routine assumes that the polygon has been clipped to the
947     * viewport already, that is, no vertex lies outside of the framebuffer.
948     * If this happens, the code below won't corrupt memory but the
949     * coverage values may not be correct.
950     */
951
952    GGL_CONTEXT(c, con);
953
954    // we do only quads for now (it's used for thick lines)
955    if ((count>4) || (count<2)) return;
956
957    // take scissor into account
958    const int xmin = c->state.scissor.left;
959    const int xmax = c->state.scissor.right;
960    if (xmin >= xmax) return;
961
962    // generate edges from the vertices
963    int32_t ymin = TRI_FROM_INT(c->state.scissor.top);
964    int32_t ymax = TRI_FROM_INT(c->state.scissor.bottom);
965    if (ymin >= ymax) return;
966
967    AAEdge edges[4];
968    int num_edges = 0;
969    GGLcoord const * p = pts;
970    for (int i=0 ; i<count-1 ; i++, p+=2) {
971        aa_edge_setup(edges, &num_edges, p, p+2, ymin, ymax);
972    }
973    aa_edge_setup(edges, &num_edges, p, pts, ymin, ymax );
974    if (ggl_unlikely(num_edges<2))
975        return;
976
977    // sort the edge list top to bottom, left to right.
978    qsort(edges, num_edges, sizeof(AAEdge), (compar_t)compare_edges);
979
980    int16_t* const covPtr = c->state.buffers.coverage;
981    memset(covPtr+xmin, 0, (xmax-xmin)*sizeof(*covPtr));
982
983    // now, sweep all edges in order
984    // start with the 2 first edges. We know that they share their top
985    // vertex, by construction.
986    int i = 2;
987    AAEdge* left  = &edges[0];
988    AAEdge* right = &edges[1];
989    int32_t yt = left->y_top;
990    GGLfixed l = left->x;
991    GGLfixed r = right->x;
992    int retire = 0;
993    int16_t* coverage;
994
995    // at this point we can initialize the rasterizer
996    c->init_y(c, yt>>TRI_FRACTION_BITS);
997    c->iterators.xl = xmax;
998    c->iterators.xr = xmin;
999
1000    do {
1001        int32_t y = min(min(left->y_bot, right->y_bot), TRI_FLOOR(yt + TRI_ONE));
1002        const int32_t shift = TRI_FRACTION_BITS + TRI_ITERATORS_BITS - FIXED_BITS;
1003        const int cf_shift = (1 + TRI_FRACTION_BITS*2 + TRI_ITERATORS_BITS - 15);
1004
1005        // compute xmin and xmax for the left edge
1006        GGLfixed l_min = gglMulAddx(left->x_incr, y - left->y_top, left->x, shift);
1007        GGLfixed l_max = l;
1008        l = l_min;
1009        if (l_min > l_max)
1010            swap(l_min, l_max);
1011
1012        // compute xmin and xmax for the right edge
1013        GGLfixed r_min = gglMulAddx(right->x_incr, y - right->y_top, right->x, shift);
1014        GGLfixed r_max = r;
1015        r = r_min;
1016        if (r_min > r_max)
1017            swap(r_min, r_max);
1018
1019        // make sure we're not touching coverage values outside of the
1020        // framebuffer
1021        l_min &= ~(l_min>>31);
1022        r_min &= ~(r_min>>31);
1023        l_max &= ~(l_max>>31);
1024        r_max &= ~(r_max>>31);
1025        if (gglFixedToIntFloor(l_min) >= xmax) l_min = gglIntToFixed(xmax)-1;
1026        if (gglFixedToIntFloor(r_min) >= xmax) r_min = gglIntToFixed(xmax)-1;
1027        if (gglFixedToIntCeil(l_max) >= xmax)  l_max = gglIntToFixed(xmax)-1;
1028        if (gglFixedToIntCeil(r_max) >= xmax)  r_max = gglIntToFixed(xmax)-1;
1029
1030        // compute the integer versions of the above
1031        const GGLfixed l_min_i = gglFloorx(l_min);
1032        const GGLfixed l_max_i = gglCeilx (l_max);
1033        const GGLfixed r_min_i = gglFloorx(r_min);
1034        const GGLfixed r_max_i = gglCeilx (r_max);
1035
1036        // clip horizontally using the scissor
1037        const int xml = max(xmin, gglFixedToIntFloor(l_min_i));
1038        const int xmr = min(xmax, gglFixedToIntFloor(r_max_i));
1039
1040        // if we just stepped to a new scanline, render the previous one.
1041        // and clear the coverage buffer
1042        if (retire) {
1043            if (c->iterators.xl < c->iterators.xr)
1044                c->scanline(c);
1045            c->step_y(c);
1046            memset(covPtr+xmin, 0, (xmax-xmin)*sizeof(*covPtr));
1047            c->iterators.xl = xml;
1048            c->iterators.xr = xmr;
1049        } else {
1050            // update the horizontal range of this scanline
1051            c->iterators.xl = min(c->iterators.xl, xml);
1052            c->iterators.xr = max(c->iterators.xr, xmr);
1053        }
1054
1055        coverage = covPtr + gglFixedToIntFloor(l_min_i);
1056        if (l_min_i == gglFloorx(l_max)) {
1057
1058            /*
1059             *  fully traverse this pixel vertically
1060             *       l_max
1061             *  +-----/--+  yt
1062             *  |    /   |
1063             *  |   /    |
1064             *  |  /     |
1065             *  +-/------+  y
1066             *   l_min  (l_min_i + TRI_ONE)
1067             */
1068
1069            GGLfixed dx = l_max - l_min;
1070            int32_t dy = y - yt;
1071            int cf = gglMulx((dx >> 1) + (l_min_i + FIXED_ONE - l_max), dy,
1072                FIXED_BITS + TRI_FRACTION_BITS - 15);
1073            ADD_COVERAGE(coverage, cf);
1074            // all pixels on the right have cf = 1.0
1075        } else {
1076            /*
1077             *  spans several pixels in one scanline
1078             *            l_max
1079             *  +--------+--/-----+  yt
1080             *  |        |/       |
1081             *  |       /|        |
1082             *  |     /  |        |
1083             *  +---/----+--------+  y
1084             *   l_min (l_min_i + TRI_ONE)
1085             */
1086
1087            // handle the first pixel separately...
1088            const int32_t y_incr = left->y_incr;
1089            int32_t dx = TRI_FROM_FIXED(l_min_i - l_min) + TRI_ONE;
1090            int32_t cf = (dx * dx * y_incr) >> cf_shift;
1091            ADD_COVERAGE(coverage, cf);
1092
1093            // following pixels get covered by y_incr, but we need
1094            // to fix-up the cf to account for previous partial pixel
1095            dx = TRI_FROM_FIXED(l_min - l_min_i);
1096            cf -= (dx * dx * y_incr) >> cf_shift;
1097            for (int x = l_min_i+FIXED_ONE ; x < l_max_i-FIXED_ONE ; x += FIXED_ONE) {
1098                cf += y_incr >> (TRI_ITERATORS_BITS-15);
1099                ADD_COVERAGE(coverage, cf);
1100            }
1101
1102            // and the last pixel
1103            dx = TRI_FROM_FIXED(l_max - l_max_i) - TRI_ONE;
1104            cf += (dx * dx * y_incr) >> cf_shift;
1105            ADD_COVERAGE(coverage, cf);
1106        }
1107
1108        // now, fill up all fully covered pixels
1109        coverage = covPtr + gglFixedToIntFloor(l_max_i);
1110        int cf = ((y - yt) << (15 - TRI_FRACTION_BITS));
1111        if (ggl_likely(cf >= 0x8000)) {
1112            SET_COVERAGE(coverage, 0x7FFF, ((r_max - l_max_i)>>FIXED_BITS)+1);
1113        } else {
1114            for (int x=l_max_i ; x<r_max ; x+=FIXED_ONE) {
1115                ADD_COVERAGE(coverage, cf);
1116            }
1117        }
1118
1119        // subtract the coverage of the right edge
1120        coverage = covPtr + gglFixedToIntFloor(r_min_i);
1121        if (r_min_i == gglFloorx(r_max)) {
1122            GGLfixed dx = r_max - r_min;
1123            int32_t dy = y - yt;
1124            int cf = gglMulx((dx >> 1) + (r_min_i + FIXED_ONE - r_max), dy,
1125                FIXED_BITS + TRI_FRACTION_BITS - 15);
1126            SUB_COVERAGE(coverage, cf);
1127            // all pixels on the right have cf = 1.0
1128        } else {
1129            // handle the first pixel separately...
1130            const int32_t y_incr = right->y_incr;
1131            int32_t dx = TRI_FROM_FIXED(r_min_i - r_min) + TRI_ONE;
1132            int32_t cf = (dx * dx * y_incr) >> cf_shift;
1133            SUB_COVERAGE(coverage, cf);
1134
1135            // following pixels get covered by y_incr, but we need
1136            // to fix-up the cf to account for previous partial pixel
1137            dx = TRI_FROM_FIXED(r_min - r_min_i);
1138            cf -= (dx * dx * y_incr) >> cf_shift;
1139            for (int x = r_min_i+FIXED_ONE ; x < r_max_i-FIXED_ONE ; x += FIXED_ONE) {
1140                cf += y_incr >> (TRI_ITERATORS_BITS-15);
1141                SUB_COVERAGE(coverage, cf);
1142            }
1143
1144            // and the last pixel
1145            dx = TRI_FROM_FIXED(r_max - r_max_i) - TRI_ONE;
1146            cf += (dx * dx * y_incr) >> cf_shift;
1147            SUB_COVERAGE(coverage, cf);
1148        }
1149
1150        // did we reach the end of an edge? if so, get a new one.
1151        if (y == left->y_bot || y == right->y_bot) {
1152            // bail out if we're done
1153            if (i>=num_edges)
1154                break;
1155            if (y == left->y_bot)
1156                left = &edges[i++];
1157            if (y == right->y_bot)
1158                right = &edges[i++];
1159        }
1160
1161        // next scanline
1162        yt = y;
1163
1164        // did we just finish a scanline?
1165        retire = (y << (32-TRI_FRACTION_BITS)) == 0;
1166    } while (true);
1167
1168    // render the last scanline
1169    if (c->iterators.xl < c->iterators.xr)
1170        c->scanline(c);
1171}
1172
1173}; // namespace android
1174