GGLAssembler.cpp revision dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0
1/* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define LOG_TAG "GGLAssembler"
19
20#include <assert.h>
21#include <stdint.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <sys/types.h>
25#include <cutils/log.h>
26
27#include "codeflinger/GGLAssembler.h"
28
29namespace android {
30
31// ----------------------------------------------------------------------------
32
33GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
34    : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
35{
36}
37
38GGLAssembler::~GGLAssembler()
39{
40}
41
42void GGLAssembler::prolog()
43{
44    ARMAssemblerProxy::prolog();
45}
46
47void GGLAssembler::epilog(uint32_t touched)
48{
49    ARMAssemblerProxy::epilog(touched);
50}
51
52void GGLAssembler::reset(int opt_level)
53{
54    ARMAssemblerProxy::reset();
55    RegisterAllocator::reset();
56    mOptLevel = opt_level;
57}
58
59// ---------------------------------------------------------------------------
60
61int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
62{
63    int err = 0;
64    int opt_level = mOptLevel;
65    while (opt_level >= 0) {
66        reset(opt_level);
67        err = scanline_core(needs, c);
68        if (err == 0)
69            break;
70        opt_level--;
71    }
72
73    // XXX: in theory, pcForLabel is not valid before generate()
74    uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
75    uint32_t* fragment_end_pc = pcForLabel("epilog");
76    const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
77
78    // build a name for our pipeline
79    char name[64];
80    sprintf(name,
81            "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
82            needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
83
84    if (err) {
85        LOGE("Error while generating ""%s""\n", name);
86        disassemble(name);
87        return -1;
88    }
89
90    return generate(name);
91}
92
93int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
94{
95    int64_t duration = ggl_system_time();
96
97    mBlendFactorCached = 0;
98    mBlending = 0;
99    mMasking = 0;
100    mAA        = GGL_READ_NEEDS(P_AA, needs.p);
101    mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102    mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103    mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104    mFog       = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105    mSmooth    = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106    mBuilderContext.needs = needs;
107    mBuilderContext.c = c;
108    mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109    mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110
111    // ------------------------------------------------------------------------
112
113    decodeLogicOpNeeds(needs);
114
115    decodeTMUNeeds(needs, c);
116
117    mBlendSrc  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118    mBlendDst  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119    mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120    mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121
122    if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123        if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124            (mBlendSrc == GGL_DST_ALPHA)) {
125            mBlendSrc = GGL_ONE;
126        }
127        if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128            (mBlendSrcA == GGL_DST_ALPHA)) {
129            mBlendSrcA = GGL_ONE;
130        }
131        if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132            (mBlendDst == GGL_DST_ALPHA)) {
133            mBlendDst = GGL_ONE;
134        }
135        if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136            (mBlendDstA == GGL_DST_ALPHA)) {
137            mBlendDstA = GGL_ONE;
138        }
139    }
140
141    // if we need the framebuffer, read it now
142    const int blending =    blending_codes(mBlendSrc, mBlendDst) |
143                            blending_codes(mBlendSrcA, mBlendDstA);
144
145    // XXX: handle special cases, destination not modified...
146    if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147        (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148        // Destination unmodified (beware of logic ops)
149    } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150        (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151        // Destination is zero (beware of logic ops)
152    }
153
154    int fbComponents = 0;
155    const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
156    for (int i=0 ; i<4 ; i++) {
157        const int mask = 1<<i;
158        component_info_t& info = mInfo[i];
159        int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
160        int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
161        if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
162            fs = GGL_ONE;
163        info.masked =   !!(masking & mask);
164        info.inDest =   !info.masked && mCbFormat.c[i].h &&
165                        ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
166        if (mCbFormat.components >= GGL_LUMINANCE &&
167                (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
168            info.inDest = false;
169        }
170        info.needed =   (i==GGLFormat::ALPHA) &&
171                        (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
172        info.replaced = !!(mTextureMachine.replaced & mask);
173        info.iterated = (!info.replaced && (info.inDest || info.needed));
174        info.smooth =   mSmooth && info.iterated;
175        info.fog =      mFog && info.inDest && (i != GGLFormat::ALPHA);
176        info.blend =    (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
177
178        mBlending |= (info.blend ? mask : 0);
179        mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
180        fbComponents |= mCbFormat.c[i].h ? mask : 0;
181    }
182
183    mAllMasked = (mMasking == fbComponents);
184    if (mAllMasked) {
185        mDithering = 0;
186    }
187
188    fragment_parts_t parts;
189
190    // ------------------------------------------------------------------------
191    prolog();
192    // ------------------------------------------------------------------------
193
194    build_scanline_prolog(parts, needs);
195
196    if (registerFile().status())
197        return registerFile().status();
198
199    // ------------------------------------------------------------------------
200    label("fragment_loop");
201    // ------------------------------------------------------------------------
202    {
203        Scratch regs(registerFile());
204
205        if (mDithering) {
206            // update the dither index.
207            MOV(AL, 0, parts.count.reg,
208                    reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
209            ADD(AL, 0, parts.count.reg, parts.count.reg,
210                    imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
211            MOV(AL, 0, parts.count.reg,
212                    reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
213        }
214
215        // XXX: could we do an early alpha-test here in some cases?
216        // It would probaly be used only with smooth-alpha and no texture
217        // (or no alpha component in the texture).
218
219        // Early z-test
220        if (mAlphaTest==GGL_ALWAYS) {
221            build_depth_test(parts, Z_TEST|Z_WRITE);
222        } else {
223            // we cannot do the z-write here, because
224            // it might be killed by the alpha-test later
225            build_depth_test(parts, Z_TEST);
226        }
227
228        { // texture coordinates
229            Scratch scratches(registerFile());
230
231            // texel generation
232            build_textures(parts, regs);
233        }
234
235        if ((blending & (FACTOR_DST|BLEND_DST)) ||
236                (mMasking && !mAllMasked) ||
237                (mLogicOp & LOGIC_OP_DST))
238        {
239            // blending / logic_op / masking need the framebuffer
240            mDstPixel.setTo(regs.obtain(), &mCbFormat);
241
242            // load the framebuffer pixel
243            comment("fetch color-buffer");
244            load(parts.cbPtr, mDstPixel);
245        }
246
247        if (registerFile().status())
248            return registerFile().status();
249
250        pixel_t pixel;
251        int directTex = mTextureMachine.directTexture;
252        if (directTex | parts.packed) {
253            // note: we can't have both here
254            // iterated color or direct texture
255            pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
256            pixel.flags &= ~CORRUPTIBLE;
257        } else {
258            if (mDithering) {
259                const int ctxtReg = mBuilderContext.Rctx;
260                const int mask = GGL_DITHER_SIZE-1;
261                parts.dither = reg_t(regs.obtain());
262                AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
263                ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
264                LDRB(AL, parts.dither.reg, parts.dither.reg,
265                        immed12_pre(GGL_OFFSETOF(ditherMatrix)));
266            }
267
268            // allocate a register for the resulting pixel
269            pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
270
271            build_component(pixel, parts, GGLFormat::ALPHA,    regs);
272
273            if (mAlphaTest!=GGL_ALWAYS) {
274                // only handle the z-write part here. We know z-test
275                // was successful, as well as alpha-test.
276                build_depth_test(parts, Z_WRITE);
277            }
278
279            build_component(pixel, parts, GGLFormat::RED,      regs);
280            build_component(pixel, parts, GGLFormat::GREEN,    regs);
281            build_component(pixel, parts, GGLFormat::BLUE,     regs);
282
283            pixel.flags |= CORRUPTIBLE;
284        }
285
286        if (registerFile().status())
287            return registerFile().status();
288
289        if (pixel.reg == -1) {
290            // be defensive here. if we're here it's probably
291            // that this whole fragment is a no-op.
292            pixel = mDstPixel;
293        }
294
295        if (!mAllMasked) {
296            // logic operation
297            build_logic_op(pixel, regs);
298
299            // masking
300            build_masking(pixel, regs);
301
302            comment("store");
303            store(parts.cbPtr, pixel, WRITE_BACK);
304        }
305    }
306
307    if (registerFile().status())
308        return registerFile().status();
309
310    // update the iterated color...
311    if (parts.reload != 3) {
312        build_smooth_shade(parts);
313    }
314
315    // update iterated z
316    build_iterate_z(parts);
317
318    // update iterated fog
319    build_iterate_f(parts);
320
321    SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
322    B(PL, "fragment_loop");
323    label("epilog");
324    epilog(registerFile().touched());
325
326    if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
327        if (mDepthTest!=GGL_ALWAYS) {
328            label("discard_before_textures");
329            build_iterate_texture_coordinates(parts);
330        }
331        label("discard_after_textures");
332        build_smooth_shade(parts);
333        build_iterate_z(parts);
334        build_iterate_f(parts);
335        if (!mAllMasked) {
336            ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
337        }
338        SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
339        B(PL, "fragment_loop");
340        epilog(registerFile().touched());
341    }
342
343    return registerFile().status();
344}
345
346// ---------------------------------------------------------------------------
347
348void GGLAssembler::build_scanline_prolog(
349    fragment_parts_t& parts, const needs_t& needs)
350{
351    Scratch scratches(registerFile());
352    int Rctx = mBuilderContext.Rctx;
353
354    // compute count
355    comment("compute ct (# of pixels to process)");
356    parts.count.setTo(obtainReg());
357    int Rx = scratches.obtain();
358    int Ry = scratches.obtain();
359    CONTEXT_LOAD(Rx, iterators.xl);
360    CONTEXT_LOAD(parts.count.reg, iterators.xr);
361    CONTEXT_LOAD(Ry, iterators.y);
362
363    // parts.count = iterators.xr - Rx
364    SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
365    SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
366
367    if (mDithering) {
368        // parts.count.reg = 0xNNNNXXDD
369        // NNNN = count-1
370        // DD   = dither offset
371        // XX   = 0xxxxxxx (x = garbage)
372        Scratch scratches(registerFile());
373        int tx = scratches.obtain();
374        int ty = scratches.obtain();
375        AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
376        AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
377        ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
378        ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
379    } else {
380        // parts.count.reg = 0xNNNN0000
381        // NNNN = count-1
382        MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
383    }
384
385    if (!mAllMasked) {
386        // compute dst ptr
387        comment("compute color-buffer pointer");
388        const int cb_bits = mCbFormat.size*8;
389        int Rs = scratches.obtain();
390        parts.cbPtr.setTo(obtainReg(), cb_bits);
391        CONTEXT_LOAD(Rs, state.buffers.color.stride);
392        CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
393        SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
394        base_offset(parts.cbPtr, parts.cbPtr, Rs);
395        scratches.recycle(Rs);
396    }
397
398    // init fog
399    const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
400    if (need_fog) {
401        comment("compute initial fog coordinate");
402        Scratch scratches(registerFile());
403        int dfdx = scratches.obtain();
404        int ydfdy = scratches.obtain();
405        int f = ydfdy;
406        CONTEXT_LOAD(dfdx,  generated_vars.dfdx);
407        CONTEXT_LOAD(ydfdy, iterators.ydfdy);
408        MLA(AL, 0, f, Rx, dfdx, ydfdy);
409        CONTEXT_STORE(f, generated_vars.f);
410    }
411
412    // init Z coordinate
413    if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
414        parts.z = reg_t(obtainReg());
415        comment("compute initial Z coordinate");
416        Scratch scratches(registerFile());
417        int dzdx = scratches.obtain();
418        int ydzdy = parts.z.reg;
419        CONTEXT_LOAD(dzdx,  generated_vars.dzdx);   // 1.31 fixed-point
420        CONTEXT_LOAD(ydzdy, iterators.ydzdy);       // 1.31 fixed-point
421        MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
422
423        // we're going to index zbase of parts.count
424        // zbase = base + (xl-count + stride*y)*2
425        int Rs = dzdx;
426        int zbase = scratches.obtain();
427        CONTEXT_LOAD(Rs, state.buffers.depth.stride);
428        CONTEXT_LOAD(zbase, state.buffers.depth.data);
429        SMLABB(AL, Rs, Ry, Rs, Rx);
430        ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
431        ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
432        CONTEXT_STORE(zbase, generated_vars.zbase);
433    }
434
435    // init texture coordinates
436    init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
437    scratches.recycle(Ry);
438
439    // iterated color
440    init_iterated_color(parts, reg_t(Rx));
441
442    // init coverage factor application (anti-aliasing)
443    if (mAA) {
444        parts.covPtr.setTo(obtainReg(), 16);
445        CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
446        ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
447    }
448}
449
450// ---------------------------------------------------------------------------
451
452void GGLAssembler::build_component( pixel_t& pixel,
453                                    const fragment_parts_t& parts,
454                                    int component,
455                                    Scratch& regs)
456{
457    static char const * comments[] = {"alpha", "red", "green", "blue"};
458    comment(comments[component]);
459
460    // local register file
461    Scratch scratches(registerFile());
462    const int dst_component_size = pixel.component_size(component);
463
464    component_t temp(-1);
465    build_incoming_component( temp, dst_component_size,
466            parts, component, scratches, regs);
467
468    if (mInfo[component].inDest) {
469
470        // blending...
471        build_blending( temp, mDstPixel, component, scratches );
472
473        // downshift component and rebuild pixel...
474        downshift(pixel, component, temp, parts.dither);
475    }
476}
477
478void GGLAssembler::build_incoming_component(
479                                    component_t& temp,
480                                    int dst_size,
481                                    const fragment_parts_t& parts,
482                                    int component,
483                                    Scratch& scratches,
484                                    Scratch& global_regs)
485{
486    const uint32_t component_mask = 1<<component;
487
488    // Figure out what we need for the blending stage...
489    int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
490    int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
491    if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
492        fs = GGL_ONE;
493    }
494
495    // Figure out what we need to extract and for what reason
496    const int blending = blending_codes(fs, fd);
497
498    // Are we actually going to blend?
499    const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
500
501    // expand the source if the destination has more bits
502    int need_expander = false;
503    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
504        texture_unit_t& tmu = mTextureMachine.tmu[i];
505        if ((tmu.format_idx) &&
506            (parts.texel[i].component_size(component) < dst_size)) {
507            need_expander = true;
508        }
509    }
510
511    // do we need to extract this component?
512    const bool multiTexture = mTextureMachine.activeUnits > 1;
513    const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
514                                        (isAlphaSourceNeeded());
515    int need_extract = mInfo[component].needed;
516    if (mInfo[component].inDest)
517    {
518        need_extract |= ((need_blending ?
519                (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
520        need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
521        need_extract |= mInfo[component].smooth;
522        need_extract |= mInfo[component].fog;
523        need_extract |= mDithering;
524        need_extract |= multiTexture;
525    }
526
527    if (need_extract) {
528        Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
529        component_t fragment;
530
531        // iterated color
532        build_iterated_color(fragment, parts, component, regs);
533
534        // texture environement (decal, modulate, replace)
535        build_texture_environment(fragment, parts, component, regs);
536
537        // expand the source if the destination has more bits
538        if (need_expander && (fragment.size() < dst_size)) {
539            // we're here only if we fetched a texel
540            // (so we know for sure fragment is CORRUPTIBLE)
541            expand(fragment, fragment, dst_size);
542        }
543
544        // We have a few specific things to do for the alpha-channel
545        if ((component==GGLFormat::ALPHA) &&
546            (mInfo[component].needed || fragment.size()<dst_size))
547        {
548            // convert to integer_t first and make sure
549            // we don't corrupt a needed register
550            if (fragment.l) {
551                component_t incoming(fragment);
552                modify(fragment, regs);
553                MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
554                fragment.h -= fragment.l;
555                fragment.l = 0;
556            }
557
558            // coverage factor application
559            build_coverage_application(fragment, parts, regs);
560
561            // alpha-test
562            build_alpha_test(fragment, parts);
563
564            if (blend_needs_alpha_source) {
565                // We keep only 8 bits for the blending stage
566                const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
567                if (fragment.flags & CORRUPTIBLE) {
568                    fragment.flags &= ~CORRUPTIBLE;
569                    mAlphaSource.setTo(fragment.reg,
570                            fragment.size(), fragment.flags);
571                    if (shift) {
572                        MOV(AL, 0, mAlphaSource.reg,
573                            reg_imm(mAlphaSource.reg, LSR, shift));
574                    }
575                } else {
576                    // XXX: it would better to do this in build_blend_factor()
577                    // so we can avoid the extra MOV below.
578                    mAlphaSource.setTo(regs.obtain(),
579                            fragment.size(), CORRUPTIBLE);
580                    if (shift) {
581                        MOV(AL, 0, mAlphaSource.reg,
582                            reg_imm(fragment.reg, LSR, shift));
583                    } else {
584                        MOV(AL, 0, mAlphaSource.reg, fragment.reg);
585                    }
586                }
587                mAlphaSource.s -= shift;
588            }
589        }
590
591        // fog...
592        build_fog( fragment, component, regs );
593
594        temp = fragment;
595    } else {
596        if (mInfo[component].inDest) {
597            // extraction not needed and replace
598            // we just select the right component
599            if ((mTextureMachine.replaced & component_mask) == 0) {
600                // component wasn't replaced, so use it!
601                temp = component_t(parts.iterated, component);
602            }
603            for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
604                const texture_unit_t& tmu = mTextureMachine.tmu[i];
605                if ((tmu.mask & component_mask) &&
606                    ((tmu.replaced & component_mask) == 0)) {
607                    temp = component_t(parts.texel[i], component);
608                }
609            }
610        }
611    }
612}
613
614bool GGLAssembler::isAlphaSourceNeeded() const
615{
616    // XXX: also needed for alpha-test
617    const int bs = mBlendSrc;
618    const int bd = mBlendDst;
619    return  bs==GGL_SRC_ALPHA_SATURATE ||
620            bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
621            bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
622}
623
624// ---------------------------------------------------------------------------
625
626void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
627{
628    if (mSmooth && !parts.iterated_packed) {
629        // update the iterated color in a pipelined way...
630        comment("update iterated color");
631        Scratch scratches(registerFile());
632
633        const int reload = parts.reload;
634        for (int i=0 ; i<4 ; i++) {
635            if (!mInfo[i].iterated)
636                continue;
637
638            int c = parts.argb[i].reg;
639            int dx = parts.argb_dx[i].reg;
640
641            if (reload & 1) {
642                c = scratches.obtain();
643                CONTEXT_LOAD(c, generated_vars.argb[i].c);
644            }
645            if (reload & 2) {
646                dx = scratches.obtain();
647                CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
648            }
649
650            if (mSmooth) {
651                ADD(AL, 0, c, c, dx);
652            }
653
654            if (reload & 1) {
655                CONTEXT_STORE(c, generated_vars.argb[i].c);
656                scratches.recycle(c);
657            }
658            if (reload & 2) {
659                scratches.recycle(dx);
660            }
661        }
662    }
663}
664
665// ---------------------------------------------------------------------------
666
667void GGLAssembler::build_coverage_application(component_t& fragment,
668        const fragment_parts_t& parts, Scratch& regs)
669{
670    // here fragment.l is guarenteed to be 0
671    if (mAA) {
672        // coverages are 1.15 fixed-point numbers
673        comment("coverage application");
674
675        component_t incoming(fragment);
676        modify(fragment, regs);
677
678        Scratch scratches(registerFile());
679        int cf = scratches.obtain();
680        LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
681        if (fragment.h > 31) {
682            fragment.h--;
683            SMULWB(AL, fragment.reg, incoming.reg, cf);
684        } else {
685            MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
686            SMULWB(AL, fragment.reg, fragment.reg, cf);
687        }
688    }
689}
690
691// ---------------------------------------------------------------------------
692
693void GGLAssembler::build_alpha_test(component_t& fragment,
694                                    const fragment_parts_t& parts)
695{
696    if (mAlphaTest != GGL_ALWAYS) {
697        comment("Alpha Test");
698        Scratch scratches(registerFile());
699        int ref = scratches.obtain();
700        const int shift = GGL_COLOR_BITS-fragment.size();
701        CONTEXT_LOAD(ref, state.alpha_test.ref);
702        if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
703        else       CMP(AL, fragment.reg, ref);
704        int cc = NV;
705        switch (mAlphaTest) {
706        case GGL_NEVER:     cc = NV;    break;
707        case GGL_LESS:      cc = LT;    break;
708        case GGL_EQUAL:     cc = EQ;    break;
709        case GGL_LEQUAL:    cc = LS;    break;
710        case GGL_GREATER:   cc = HI;    break;
711        case GGL_NOTEQUAL:  cc = NE;    break;
712        case GGL_GEQUAL:    cc = HS;    break;
713        }
714        B(cc^1, "discard_after_textures");
715    }
716}
717
718// ---------------------------------------------------------------------------
719
720void GGLAssembler::build_depth_test(
721        const fragment_parts_t& parts, uint32_t mask)
722{
723    mask &= Z_TEST|Z_WRITE;
724    const needs_t& needs = mBuilderContext.needs;
725    const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
726    Scratch scratches(registerFile());
727
728    if (mDepthTest != GGL_ALWAYS || zmask) {
729        int cc=AL, ic=AL;
730        switch (mDepthTest) {
731        case GGL_LESS:      ic = HI;    break;
732        case GGL_EQUAL:     ic = EQ;    break;
733        case GGL_LEQUAL:    ic = HS;    break;
734        case GGL_GREATER:   ic = LT;    break;
735        case GGL_NOTEQUAL:  ic = NE;    break;
736        case GGL_GEQUAL:    ic = LS;    break;
737        case GGL_NEVER:
738            // this never happens, because it's taken care of when
739            // computing the needs. but we keep it for completness.
740            comment("Depth Test (NEVER)");
741            B(AL, "discard_before_textures");
742            return;
743        case GGL_ALWAYS:
744            // we're here because zmask is enabled
745            mask &= ~Z_TEST;    // test always passes.
746            break;
747        }
748
749        // inverse the condition
750        cc = ic^1;
751
752        if ((mask & Z_WRITE) && !zmask) {
753            mask &= ~Z_WRITE;
754        }
755
756        if (!mask)
757            return;
758
759        comment("Depth Test");
760
761        int zbase = scratches.obtain();
762        int depth = scratches.obtain();
763        int z = parts.z.reg;
764
765        CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
766        SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
767            // above does zbase = zbase + ((count >> 16) << 1)
768
769        if (mask & Z_TEST) {
770            LDRH(AL, depth, zbase);  // stall
771            CMP(AL, depth, reg_imm(z, LSR, 16));
772            B(cc, "discard_before_textures");
773        }
774        if (mask & Z_WRITE) {
775            if (mask == Z_WRITE) {
776                // only z-write asked, cc is meaningless
777                ic = AL;
778            }
779            MOV(AL, 0, depth, reg_imm(z, LSR, 16));
780            STRH(ic, depth, zbase);
781        }
782    }
783}
784
785void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
786{
787    const needs_t& needs = mBuilderContext.needs;
788    if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
789        Scratch scratches(registerFile());
790        int dzdx = scratches.obtain();
791        CONTEXT_LOAD(dzdx, generated_vars.dzdx);    // stall
792        ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
793    }
794}
795
796void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
797{
798    const needs_t& needs = mBuilderContext.needs;
799    if (GGL_READ_NEEDS(P_FOG, needs.p)) {
800        Scratch scratches(registerFile());
801        int dfdx = scratches.obtain();
802        int f = scratches.obtain();
803        CONTEXT_LOAD(f,     generated_vars.f);
804        CONTEXT_LOAD(dfdx,  generated_vars.dfdx);   // stall
805        ADD(AL, 0, f, f, dfdx);
806        CONTEXT_STORE(f,    generated_vars.f);
807    }
808}
809
810// ---------------------------------------------------------------------------
811
812void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
813{
814    const needs_t& needs = mBuilderContext.needs;
815    const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
816    if (opcode == GGL_COPY)
817        return;
818
819    comment("logic operation");
820
821    pixel_t s(pixel);
822    if (!(pixel.flags & CORRUPTIBLE)) {
823        pixel.reg = regs.obtain();
824        pixel.flags |= CORRUPTIBLE;
825    }
826
827    pixel_t d(mDstPixel);
828    switch(opcode) {
829    case GGL_CLEAR:         MOV(AL, 0, pixel.reg, imm(0));          break;
830    case GGL_AND:           AND(AL, 0, pixel.reg, s.reg, d.reg);    break;
831    case GGL_AND_REVERSE:   BIC(AL, 0, pixel.reg, s.reg, d.reg);    break;
832    case GGL_COPY:                                                  break;
833    case GGL_AND_INVERTED:  BIC(AL, 0, pixel.reg, d.reg, s.reg);    break;
834    case GGL_NOOP:          MOV(AL, 0, pixel.reg, d.reg);           break;
835    case GGL_XOR:           EOR(AL, 0, pixel.reg, s.reg, d.reg);    break;
836    case GGL_OR:            ORR(AL, 0, pixel.reg, s.reg, d.reg);    break;
837    case GGL_NOR:           ORR(AL, 0, pixel.reg, s.reg, d.reg);
838                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
839    case GGL_EQUIV:         EOR(AL, 0, pixel.reg, s.reg, d.reg);
840                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
841    case GGL_INVERT:        MVN(AL, 0, pixel.reg, d.reg);           break;
842    case GGL_OR_REVERSE:    // s | ~d == ~(~s & d)
843                            BIC(AL, 0, pixel.reg, d.reg, s.reg);
844                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
845    case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg);           break;
846    case GGL_OR_INVERTED:   // ~s | d == ~(s & ~d)
847                            BIC(AL, 0, pixel.reg, s.reg, d.reg);
848                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
849    case GGL_NAND:          AND(AL, 0, pixel.reg, s.reg, d.reg);
850                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
851    case GGL_SET:           MVN(AL, 0, pixel.reg, imm(0));          break;
852    };
853}
854
855// ---------------------------------------------------------------------------
856
857static uint32_t find_bottom(uint32_t val)
858{
859    uint32_t i = 0;
860    while (!(val & (3<<i)))
861        i+= 2;
862    return i;
863}
864
865static void normalize(uint32_t& val, uint32_t& rot)
866{
867    rot = 0;
868    while (!(val&3)  || (val & 0xFC000000)) {
869        uint32_t newval;
870        newval = val >> 2;
871        newval |= (val&3) << 30;
872        val = newval;
873        rot += 2;
874        if (rot == 32) {
875            rot = 0;
876            break;
877        }
878    }
879}
880
881void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
882{
883    uint32_t rot;
884    uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
885    mask &= size;
886
887    if (mask == size) {
888        if (d != s)
889            MOV( AL, 0, d, s);
890        return;
891    }
892
893    int negative_logic = !isValidImmediate(mask);
894    if (negative_logic) {
895        mask = ~mask & size;
896    }
897    normalize(mask, rot);
898
899    if (mask) {
900        while (mask) {
901            uint32_t bitpos = find_bottom(mask);
902            int shift = rot + bitpos;
903            uint32_t m = mask & (0xff << bitpos);
904            mask &= ~m;
905            m >>= bitpos;
906            int32_t newMask =  (m<<shift) | (m>>(32-shift));
907            if (!negative_logic) {
908                AND( AL, 0, d, s, imm(newMask) );
909            } else {
910                BIC( AL, 0, d, s, imm(newMask) );
911            }
912            s = d;
913        }
914    } else {
915        MOV( AL, 0, d, imm(0));
916    }
917}
918
919void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
920{
921    if (!mMasking || mAllMasked) {
922        return;
923    }
924
925    comment("color mask");
926
927    pixel_t fb(mDstPixel);
928    pixel_t s(pixel);
929    if (!(pixel.flags & CORRUPTIBLE)) {
930        pixel.reg = regs.obtain();
931        pixel.flags |= CORRUPTIBLE;
932    }
933
934    int mask = 0;
935    for (int i=0 ; i<4 ; i++) {
936        const int component_mask = 1<<i;
937        const int h = fb.format.c[i].h;
938        const int l = fb.format.c[i].l;
939        if (h && (!(mMasking & component_mask))) {
940            mask |= ((1<<(h-l))-1) << l;
941        }
942    }
943
944    // There is no need to clear the masked components of the source
945    // (unless we applied a logic op), because they're already zeroed
946    // by construction (masked components are not computed)
947
948    if (mLogicOp) {
949        const needs_t& needs = mBuilderContext.needs;
950        const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
951        if (opcode != GGL_CLEAR) {
952            // clear masked component of source
953            build_and_immediate(pixel.reg, s.reg, mask, fb.size());
954            s = pixel;
955        }
956    }
957
958    // clear non masked components of destination
959    build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
960
961    // or back the channels that were masked
962    if (s.reg == fb.reg) {
963         // this is in fact a MOV
964        if (s.reg == pixel.reg) {
965            // ugh. this in in fact a nop
966        } else {
967            MOV(AL, 0, pixel.reg, fb.reg);
968        }
969    } else {
970        ORR(AL, 0, pixel.reg, s.reg, fb.reg);
971    }
972}
973
974// ---------------------------------------------------------------------------
975
976void GGLAssembler::base_offset(
977        const pointer_t& d, const pointer_t& b, const reg_t& o)
978{
979    switch (b.size) {
980    case 32:
981        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
982        break;
983    case 24:
984        if (d.reg == b.reg) {
985            ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
986            ADD(AL, 0, d.reg, d.reg, o.reg);
987        } else {
988            ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
989            ADD(AL, 0, d.reg, d.reg, b.reg);
990        }
991        break;
992    case 16:
993        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
994        break;
995    case 8:
996        ADD(AL, 0, d.reg, b.reg, o.reg);
997        break;
998    }
999}
1000
1001// ----------------------------------------------------------------------------
1002// cheezy register allocator...
1003// ----------------------------------------------------------------------------
1004
1005void RegisterAllocator::reset()
1006{
1007    mRegs.reset();
1008}
1009
1010int RegisterAllocator::reserveReg(int reg)
1011{
1012    return mRegs.reserve(reg);
1013}
1014
1015int RegisterAllocator::obtainReg()
1016{
1017    return mRegs.obtain();
1018}
1019
1020void RegisterAllocator::recycleReg(int reg)
1021{
1022    mRegs.recycle(reg);
1023}
1024
1025RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1026{
1027    return mRegs;
1028}
1029
1030// ----------------------------------------------------------------------------
1031
1032RegisterAllocator::RegisterFile::RegisterFile()
1033    : mRegs(0), mTouched(0), mStatus(0)
1034{
1035    reserve(ARMAssemblerInterface::SP);
1036    reserve(ARMAssemblerInterface::PC);
1037}
1038
1039RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
1040    : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
1041{
1042}
1043
1044RegisterAllocator::RegisterFile::~RegisterFile()
1045{
1046}
1047
1048bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1049{
1050    return (mRegs == rhs.mRegs);
1051}
1052
1053void RegisterAllocator::RegisterFile::reset()
1054{
1055    mRegs = mTouched = mStatus = 0;
1056    reserve(ARMAssemblerInterface::SP);
1057    reserve(ARMAssemblerInterface::PC);
1058}
1059
1060int RegisterAllocator::RegisterFile::reserve(int reg)
1061{
1062    LOG_ALWAYS_FATAL_IF(isUsed(reg),
1063                        "reserving register %d, but already in use",
1064                        reg);
1065    mRegs |= (1<<reg);
1066    mTouched |= mRegs;
1067    return reg;
1068}
1069
1070void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1071{
1072    mRegs |= regMask;
1073    mTouched |= regMask;
1074}
1075
1076int RegisterAllocator::RegisterFile::isUsed(int reg) const
1077{
1078    LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
1079    return mRegs & (1<<reg);
1080}
1081
1082int RegisterAllocator::RegisterFile::obtain()
1083{
1084    const char priorityList[14] = {  0,  1, 2, 3,
1085                                    12, 14, 4, 5,
1086                                     6,  7, 8, 9,
1087                                    10, 11 };
1088    const int nbreg = sizeof(priorityList);
1089    int i, r;
1090    for (i=0 ; i<nbreg ; i++) {
1091        r = priorityList[i];
1092        if (!isUsed(r)) {
1093            break;
1094        }
1095    }
1096    // this is not an error anymore because, we'll try again with
1097    // a lower optimization level.
1098    //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1099    if (i >= nbreg) {
1100        mStatus |= OUT_OF_REGISTERS;
1101        // we return SP so we can more easily debug things
1102        // the code will never be run anyway.
1103        return ARMAssemblerInterface::SP;
1104    }
1105    reserve(r);
1106    return r;
1107}
1108
1109bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1110{
1111    return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
1112}
1113
1114int RegisterAllocator::RegisterFile::countFreeRegs() const
1115{
1116    int f = ~mRegs & 0xFFFF;
1117    // now count number of 1
1118   f = (f & 0x5555) + ((f>>1) & 0x5555);
1119   f = (f & 0x3333) + ((f>>2) & 0x3333);
1120   f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1121   f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1122   return f;
1123}
1124
1125void RegisterAllocator::RegisterFile::recycle(int reg)
1126{
1127    LOG_FATAL_IF(!isUsed(reg),
1128            "recycling unallocated register %d",
1129            reg);
1130    mRegs &= ~(1<<reg);
1131}
1132
1133void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1134{
1135    LOG_FATAL_IF((mRegs & regMask)!=regMask,
1136            "recycling unallocated registers "
1137            "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1138            regMask, mRegs, mRegs&regMask);
1139    mRegs &= ~regMask;
1140}
1141
1142uint32_t RegisterAllocator::RegisterFile::touched() const
1143{
1144    return mTouched;
1145}
1146
1147// ----------------------------------------------------------------------------
1148
1149}; // namespace android
1150
1151