GGLAssembler.cpp revision 2bc2b792782b304b15d8c48b54916a9b3fa3a7ac
1/* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define LOG_TAG "GGLAssembler"
19
20#include <assert.h>
21#include <stdint.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <sys/types.h>
25#include <cutils/log.h>
26
27#include "codeflinger/GGLAssembler.h"
28
29namespace android {
30
31// ----------------------------------------------------------------------------
32
33GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
34    : ARMAssemblerProxy(target),
35      RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
36{
37}
38
39GGLAssembler::~GGLAssembler()
40{
41}
42
43void GGLAssembler::prolog()
44{
45    ARMAssemblerProxy::prolog();
46}
47
48void GGLAssembler::epilog(uint32_t touched)
49{
50    ARMAssemblerProxy::epilog(touched);
51}
52
53void GGLAssembler::reset(int opt_level)
54{
55    ARMAssemblerProxy::reset();
56    RegisterAllocator::reset();
57    mOptLevel = opt_level;
58}
59
60// ---------------------------------------------------------------------------
61
62int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
63{
64    int err = 0;
65    int opt_level = mOptLevel;
66    while (opt_level >= 0) {
67        reset(opt_level);
68        err = scanline_core(needs, c);
69        if (err == 0)
70            break;
71        opt_level--;
72    }
73
74    // XXX: in theory, pcForLabel is not valid before generate()
75    uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
76    uint32_t* fragment_end_pc = pcForLabel("epilog");
77    const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
78
79    // build a name for our pipeline
80    char name[64];
81    sprintf(name,
82            "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
83            needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
84
85    if (err) {
86        ALOGE("Error while generating ""%s""\n", name);
87        disassemble(name);
88        return -1;
89    }
90
91    return generate(name);
92}
93
94int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
95{
96    int64_t duration = ggl_system_time();
97
98    mBlendFactorCached = 0;
99    mBlending = 0;
100    mMasking = 0;
101    mAA        = GGL_READ_NEEDS(P_AA, needs.p);
102    mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
103    mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
104    mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
105    mFog       = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
106    mSmooth    = GGL_READ_NEEDS(SHADE, needs.n) != 0;
107    mBuilderContext.needs = needs;
108    mBuilderContext.c = c;
109    mBuilderContext.Rctx = reserveReg(R0); // context always in R0
110    mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
111
112    // ------------------------------------------------------------------------
113
114    decodeLogicOpNeeds(needs);
115
116    decodeTMUNeeds(needs, c);
117
118    mBlendSrc  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
119    mBlendDst  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
120    mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
121    mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
122
123    if (!mCbFormat.c[GGLFormat::ALPHA].h) {
124        if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
125            (mBlendSrc == GGL_DST_ALPHA)) {
126            mBlendSrc = GGL_ONE;
127        }
128        if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
129            (mBlendSrcA == GGL_DST_ALPHA)) {
130            mBlendSrcA = GGL_ONE;
131        }
132        if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
133            (mBlendDst == GGL_DST_ALPHA)) {
134            mBlendDst = GGL_ONE;
135        }
136        if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
137            (mBlendDstA == GGL_DST_ALPHA)) {
138            mBlendDstA = GGL_ONE;
139        }
140    }
141
142    // if we need the framebuffer, read it now
143    const int blending =    blending_codes(mBlendSrc, mBlendDst) |
144                            blending_codes(mBlendSrcA, mBlendDstA);
145
146    // XXX: handle special cases, destination not modified...
147    if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
148        (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
149        // Destination unmodified (beware of logic ops)
150    } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
151        (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
152        // Destination is zero (beware of logic ops)
153    }
154
155    int fbComponents = 0;
156    const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
157    for (int i=0 ; i<4 ; i++) {
158        const int mask = 1<<i;
159        component_info_t& info = mInfo[i];
160        int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
161        int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
162        if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
163            fs = GGL_ONE;
164        info.masked =   !!(masking & mask);
165        info.inDest =   !info.masked && mCbFormat.c[i].h &&
166                        ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
167        if (mCbFormat.components >= GGL_LUMINANCE &&
168                (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
169            info.inDest = false;
170        }
171        info.needed =   (i==GGLFormat::ALPHA) &&
172                        (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
173        info.replaced = !!(mTextureMachine.replaced & mask);
174        info.iterated = (!info.replaced && (info.inDest || info.needed));
175        info.smooth =   mSmooth && info.iterated;
176        info.fog =      mFog && info.inDest && (i != GGLFormat::ALPHA);
177        info.blend =    (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
178
179        mBlending |= (info.blend ? mask : 0);
180        mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
181        fbComponents |= mCbFormat.c[i].h ? mask : 0;
182    }
183
184    mAllMasked = (mMasking == fbComponents);
185    if (mAllMasked) {
186        mDithering = 0;
187    }
188
189    fragment_parts_t parts;
190
191    // ------------------------------------------------------------------------
192    prolog();
193    // ------------------------------------------------------------------------
194
195    build_scanline_prolog(parts, needs);
196
197    if (registerFile().status())
198        return registerFile().status();
199
200    // ------------------------------------------------------------------------
201    label("fragment_loop");
202    // ------------------------------------------------------------------------
203    {
204        Scratch regs(registerFile());
205
206        if (mDithering) {
207            // update the dither index.
208            MOV(AL, 0, parts.count.reg,
209                    reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
210            ADD(AL, 0, parts.count.reg, parts.count.reg,
211                    imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
212            MOV(AL, 0, parts.count.reg,
213                    reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
214        }
215
216        // XXX: could we do an early alpha-test here in some cases?
217        // It would probaly be used only with smooth-alpha and no texture
218        // (or no alpha component in the texture).
219
220        // Early z-test
221        if (mAlphaTest==GGL_ALWAYS) {
222            build_depth_test(parts, Z_TEST|Z_WRITE);
223        } else {
224            // we cannot do the z-write here, because
225            // it might be killed by the alpha-test later
226            build_depth_test(parts, Z_TEST);
227        }
228
229        { // texture coordinates
230            Scratch scratches(registerFile());
231
232            // texel generation
233            build_textures(parts, regs);
234            if (registerFile().status())
235                return registerFile().status();
236        }
237
238        if ((blending & (FACTOR_DST|BLEND_DST)) ||
239                (mMasking && !mAllMasked) ||
240                (mLogicOp & LOGIC_OP_DST))
241        {
242            // blending / logic_op / masking need the framebuffer
243            mDstPixel.setTo(regs.obtain(), &mCbFormat);
244
245            // load the framebuffer pixel
246            comment("fetch color-buffer");
247            load(parts.cbPtr, mDstPixel);
248        }
249
250        if (registerFile().status())
251            return registerFile().status();
252
253        pixel_t pixel;
254        int directTex = mTextureMachine.directTexture;
255        if (directTex | parts.packed) {
256            // note: we can't have both here
257            // iterated color or direct texture
258            pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
259            pixel.flags &= ~CORRUPTIBLE;
260        } else {
261            if (mDithering) {
262                const int ctxtReg = mBuilderContext.Rctx;
263                const int mask = GGL_DITHER_SIZE-1;
264                parts.dither = reg_t(regs.obtain());
265                AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
266                ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
267                LDRB(AL, parts.dither.reg, parts.dither.reg,
268                        immed12_pre(GGL_OFFSETOF(ditherMatrix)));
269            }
270
271            // allocate a register for the resulting pixel
272            pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
273
274            build_component(pixel, parts, GGLFormat::ALPHA,    regs);
275
276            if (mAlphaTest!=GGL_ALWAYS) {
277                // only handle the z-write part here. We know z-test
278                // was successful, as well as alpha-test.
279                build_depth_test(parts, Z_WRITE);
280            }
281
282            build_component(pixel, parts, GGLFormat::RED,      regs);
283            build_component(pixel, parts, GGLFormat::GREEN,    regs);
284            build_component(pixel, parts, GGLFormat::BLUE,     regs);
285
286            pixel.flags |= CORRUPTIBLE;
287        }
288
289        if (registerFile().status())
290            return registerFile().status();
291
292        if (pixel.reg == -1) {
293            // be defensive here. if we're here it's probably
294            // that this whole fragment is a no-op.
295            pixel = mDstPixel;
296        }
297
298        if (!mAllMasked) {
299            // logic operation
300            build_logic_op(pixel, regs);
301
302            // masking
303            build_masking(pixel, regs);
304
305            comment("store");
306            store(parts.cbPtr, pixel, WRITE_BACK);
307        }
308    }
309
310    if (registerFile().status())
311        return registerFile().status();
312
313    // update the iterated color...
314    if (parts.reload != 3) {
315        build_smooth_shade(parts);
316    }
317
318    // update iterated z
319    build_iterate_z(parts);
320
321    // update iterated fog
322    build_iterate_f(parts);
323
324    SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
325    B(PL, "fragment_loop");
326    label("epilog");
327    epilog(registerFile().touched());
328
329    if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
330        if (mDepthTest!=GGL_ALWAYS) {
331            label("discard_before_textures");
332            build_iterate_texture_coordinates(parts);
333        }
334        label("discard_after_textures");
335        build_smooth_shade(parts);
336        build_iterate_z(parts);
337        build_iterate_f(parts);
338        if (!mAllMasked) {
339            ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
340        }
341        SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
342        B(PL, "fragment_loop");
343        epilog(registerFile().touched());
344    }
345
346    return registerFile().status();
347}
348
349// ---------------------------------------------------------------------------
350
351void GGLAssembler::build_scanline_prolog(
352    fragment_parts_t& parts, const needs_t& needs)
353{
354    Scratch scratches(registerFile());
355    int Rctx = mBuilderContext.Rctx;
356
357    // compute count
358    comment("compute ct (# of pixels to process)");
359    parts.count.setTo(obtainReg());
360    int Rx = scratches.obtain();
361    int Ry = scratches.obtain();
362    CONTEXT_LOAD(Rx, iterators.xl);
363    CONTEXT_LOAD(parts.count.reg, iterators.xr);
364    CONTEXT_LOAD(Ry, iterators.y);
365
366    // parts.count = iterators.xr - Rx
367    SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
368    SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
369
370    if (mDithering) {
371        // parts.count.reg = 0xNNNNXXDD
372        // NNNN = count-1
373        // DD   = dither offset
374        // XX   = 0xxxxxxx (x = garbage)
375        Scratch scratches(registerFile());
376        int tx = scratches.obtain();
377        int ty = scratches.obtain();
378        AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
379        AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
380        ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
381        ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
382    } else {
383        // parts.count.reg = 0xNNNN0000
384        // NNNN = count-1
385        MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
386    }
387
388    if (!mAllMasked) {
389        // compute dst ptr
390        comment("compute color-buffer pointer");
391        const int cb_bits = mCbFormat.size*8;
392        int Rs = scratches.obtain();
393        parts.cbPtr.setTo(obtainReg(), cb_bits);
394        CONTEXT_LOAD(Rs, state.buffers.color.stride);
395        CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
396        SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
397        base_offset(parts.cbPtr, parts.cbPtr, Rs);
398        scratches.recycle(Rs);
399    }
400
401    // init fog
402    const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
403    if (need_fog) {
404        comment("compute initial fog coordinate");
405        Scratch scratches(registerFile());
406        int dfdx = scratches.obtain();
407        int ydfdy = scratches.obtain();
408        int f = ydfdy;
409        CONTEXT_LOAD(dfdx,  generated_vars.dfdx);
410        CONTEXT_LOAD(ydfdy, iterators.ydfdy);
411        MLA(AL, 0, f, Rx, dfdx, ydfdy);
412        CONTEXT_STORE(f, generated_vars.f);
413    }
414
415    // init Z coordinate
416    if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
417        parts.z = reg_t(obtainReg());
418        comment("compute initial Z coordinate");
419        Scratch scratches(registerFile());
420        int dzdx = scratches.obtain();
421        int ydzdy = parts.z.reg;
422        CONTEXT_LOAD(dzdx,  generated_vars.dzdx);   // 1.31 fixed-point
423        CONTEXT_LOAD(ydzdy, iterators.ydzdy);       // 1.31 fixed-point
424        MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
425
426        // we're going to index zbase of parts.count
427        // zbase = base + (xl-count + stride*y)*2
428        int Rs = dzdx;
429        int zbase = scratches.obtain();
430        CONTEXT_LOAD(Rs, state.buffers.depth.stride);
431        CONTEXT_LOAD(zbase, state.buffers.depth.data);
432        SMLABB(AL, Rs, Ry, Rs, Rx);
433        ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
434        ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
435        CONTEXT_STORE(zbase, generated_vars.zbase);
436    }
437
438    // init texture coordinates
439    init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
440    scratches.recycle(Ry);
441
442    // iterated color
443    init_iterated_color(parts, reg_t(Rx));
444
445    // init coverage factor application (anti-aliasing)
446    if (mAA) {
447        parts.covPtr.setTo(obtainReg(), 16);
448        CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
449        ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
450    }
451}
452
453// ---------------------------------------------------------------------------
454
455void GGLAssembler::build_component( pixel_t& pixel,
456                                    const fragment_parts_t& parts,
457                                    int component,
458                                    Scratch& regs)
459{
460    static char const * comments[] = {"alpha", "red", "green", "blue"};
461    comment(comments[component]);
462
463    // local register file
464    Scratch scratches(registerFile());
465    const int dst_component_size = pixel.component_size(component);
466
467    component_t temp(-1);
468    build_incoming_component( temp, dst_component_size,
469            parts, component, scratches, regs);
470
471    if (mInfo[component].inDest) {
472
473        // blending...
474        build_blending( temp, mDstPixel, component, scratches );
475
476        // downshift component and rebuild pixel...
477        downshift(pixel, component, temp, parts.dither);
478    }
479}
480
481void GGLAssembler::build_incoming_component(
482                                    component_t& temp,
483                                    int dst_size,
484                                    const fragment_parts_t& parts,
485                                    int component,
486                                    Scratch& scratches,
487                                    Scratch& global_regs)
488{
489    const uint32_t component_mask = 1<<component;
490
491    // Figure out what we need for the blending stage...
492    int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
493    int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
494    if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
495        fs = GGL_ONE;
496    }
497
498    // Figure out what we need to extract and for what reason
499    const int blending = blending_codes(fs, fd);
500
501    // Are we actually going to blend?
502    const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
503
504    // expand the source if the destination has more bits
505    int need_expander = false;
506    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
507        texture_unit_t& tmu = mTextureMachine.tmu[i];
508        if ((tmu.format_idx) &&
509            (parts.texel[i].component_size(component) < dst_size)) {
510            need_expander = true;
511        }
512    }
513
514    // do we need to extract this component?
515    const bool multiTexture = mTextureMachine.activeUnits > 1;
516    const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
517                                        (isAlphaSourceNeeded());
518    int need_extract = mInfo[component].needed;
519    if (mInfo[component].inDest)
520    {
521        need_extract |= ((need_blending ?
522                (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
523        need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
524        need_extract |= mInfo[component].smooth;
525        need_extract |= mInfo[component].fog;
526        need_extract |= mDithering;
527        need_extract |= multiTexture;
528    }
529
530    if (need_extract) {
531        Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
532        component_t fragment;
533
534        // iterated color
535        build_iterated_color(fragment, parts, component, regs);
536
537        // texture environement (decal, modulate, replace)
538        build_texture_environment(fragment, parts, component, regs);
539
540        // expand the source if the destination has more bits
541        if (need_expander && (fragment.size() < dst_size)) {
542            // we're here only if we fetched a texel
543            // (so we know for sure fragment is CORRUPTIBLE)
544            expand(fragment, fragment, dst_size);
545        }
546
547        // We have a few specific things to do for the alpha-channel
548        if ((component==GGLFormat::ALPHA) &&
549            (mInfo[component].needed || fragment.size()<dst_size))
550        {
551            // convert to integer_t first and make sure
552            // we don't corrupt a needed register
553            if (fragment.l) {
554                component_t incoming(fragment);
555                modify(fragment, regs);
556                MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
557                fragment.h -= fragment.l;
558                fragment.l = 0;
559            }
560
561            // coverage factor application
562            build_coverage_application(fragment, parts, regs);
563
564            // alpha-test
565            build_alpha_test(fragment, parts);
566
567            if (blend_needs_alpha_source) {
568                // We keep only 8 bits for the blending stage
569                const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
570                if (fragment.flags & CORRUPTIBLE) {
571                    fragment.flags &= ~CORRUPTIBLE;
572                    mAlphaSource.setTo(fragment.reg,
573                            fragment.size(), fragment.flags);
574                    if (shift) {
575                        MOV(AL, 0, mAlphaSource.reg,
576                            reg_imm(mAlphaSource.reg, LSR, shift));
577                    }
578                } else {
579                    // XXX: it would better to do this in build_blend_factor()
580                    // so we can avoid the extra MOV below.
581                    mAlphaSource.setTo(regs.obtain(),
582                            fragment.size(), CORRUPTIBLE);
583                    if (shift) {
584                        MOV(AL, 0, mAlphaSource.reg,
585                            reg_imm(fragment.reg, LSR, shift));
586                    } else {
587                        MOV(AL, 0, mAlphaSource.reg, fragment.reg);
588                    }
589                }
590                mAlphaSource.s -= shift;
591            }
592        }
593
594        // fog...
595        build_fog( fragment, component, regs );
596
597        temp = fragment;
598    } else {
599        if (mInfo[component].inDest) {
600            // extraction not needed and replace
601            // we just select the right component
602            if ((mTextureMachine.replaced & component_mask) == 0) {
603                // component wasn't replaced, so use it!
604                temp = component_t(parts.iterated, component);
605            }
606            for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
607                const texture_unit_t& tmu = mTextureMachine.tmu[i];
608                if ((tmu.mask & component_mask) &&
609                    ((tmu.replaced & component_mask) == 0)) {
610                    temp = component_t(parts.texel[i], component);
611                }
612            }
613        }
614    }
615}
616
617bool GGLAssembler::isAlphaSourceNeeded() const
618{
619    // XXX: also needed for alpha-test
620    const int bs = mBlendSrc;
621    const int bd = mBlendDst;
622    return  bs==GGL_SRC_ALPHA_SATURATE ||
623            bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
624            bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
625}
626
627// ---------------------------------------------------------------------------
628
629void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
630{
631    if (mSmooth && !parts.iterated_packed) {
632        // update the iterated color in a pipelined way...
633        comment("update iterated color");
634        Scratch scratches(registerFile());
635
636        const int reload = parts.reload;
637        for (int i=0 ; i<4 ; i++) {
638            if (!mInfo[i].iterated)
639                continue;
640
641            int c = parts.argb[i].reg;
642            int dx = parts.argb_dx[i].reg;
643
644            if (reload & 1) {
645                c = scratches.obtain();
646                CONTEXT_LOAD(c, generated_vars.argb[i].c);
647            }
648            if (reload & 2) {
649                dx = scratches.obtain();
650                CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
651            }
652
653            if (mSmooth) {
654                ADD(AL, 0, c, c, dx);
655            }
656
657            if (reload & 1) {
658                CONTEXT_STORE(c, generated_vars.argb[i].c);
659                scratches.recycle(c);
660            }
661            if (reload & 2) {
662                scratches.recycle(dx);
663            }
664        }
665    }
666}
667
668// ---------------------------------------------------------------------------
669
670void GGLAssembler::build_coverage_application(component_t& fragment,
671        const fragment_parts_t& parts, Scratch& regs)
672{
673    // here fragment.l is guarenteed to be 0
674    if (mAA) {
675        // coverages are 1.15 fixed-point numbers
676        comment("coverage application");
677
678        component_t incoming(fragment);
679        modify(fragment, regs);
680
681        Scratch scratches(registerFile());
682        int cf = scratches.obtain();
683        LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
684        if (fragment.h > 31) {
685            fragment.h--;
686            SMULWB(AL, fragment.reg, incoming.reg, cf);
687        } else {
688            MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
689            SMULWB(AL, fragment.reg, fragment.reg, cf);
690        }
691    }
692}
693
694// ---------------------------------------------------------------------------
695
696void GGLAssembler::build_alpha_test(component_t& fragment,
697                                    const fragment_parts_t& parts)
698{
699    if (mAlphaTest != GGL_ALWAYS) {
700        comment("Alpha Test");
701        Scratch scratches(registerFile());
702        int ref = scratches.obtain();
703        const int shift = GGL_COLOR_BITS-fragment.size();
704        CONTEXT_LOAD(ref, state.alpha_test.ref);
705        if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
706        else       CMP(AL, fragment.reg, ref);
707        int cc = NV;
708        switch (mAlphaTest) {
709        case GGL_NEVER:     cc = NV;    break;
710        case GGL_LESS:      cc = LT;    break;
711        case GGL_EQUAL:     cc = EQ;    break;
712        case GGL_LEQUAL:    cc = LS;    break;
713        case GGL_GREATER:   cc = HI;    break;
714        case GGL_NOTEQUAL:  cc = NE;    break;
715        case GGL_GEQUAL:    cc = HS;    break;
716        }
717        B(cc^1, "discard_after_textures");
718    }
719}
720
721// ---------------------------------------------------------------------------
722
723void GGLAssembler::build_depth_test(
724        const fragment_parts_t& parts, uint32_t mask)
725{
726    mask &= Z_TEST|Z_WRITE;
727    const needs_t& needs = mBuilderContext.needs;
728    const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
729    Scratch scratches(registerFile());
730
731    if (mDepthTest != GGL_ALWAYS || zmask) {
732        int cc=AL, ic=AL;
733        switch (mDepthTest) {
734        case GGL_LESS:      ic = HI;    break;
735        case GGL_EQUAL:     ic = EQ;    break;
736        case GGL_LEQUAL:    ic = HS;    break;
737        case GGL_GREATER:   ic = LT;    break;
738        case GGL_NOTEQUAL:  ic = NE;    break;
739        case GGL_GEQUAL:    ic = LS;    break;
740        case GGL_NEVER:
741            // this never happens, because it's taken care of when
742            // computing the needs. but we keep it for completness.
743            comment("Depth Test (NEVER)");
744            B(AL, "discard_before_textures");
745            return;
746        case GGL_ALWAYS:
747            // we're here because zmask is enabled
748            mask &= ~Z_TEST;    // test always passes.
749            break;
750        }
751
752        // inverse the condition
753        cc = ic^1;
754
755        if ((mask & Z_WRITE) && !zmask) {
756            mask &= ~Z_WRITE;
757        }
758
759        if (!mask)
760            return;
761
762        comment("Depth Test");
763
764        int zbase = scratches.obtain();
765        int depth = scratches.obtain();
766        int z = parts.z.reg;
767
768        CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
769        SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
770            // above does zbase = zbase + ((count >> 16) << 1)
771
772        if (mask & Z_TEST) {
773            LDRH(AL, depth, zbase);  // stall
774            CMP(AL, depth, reg_imm(z, LSR, 16));
775            B(cc, "discard_before_textures");
776        }
777        if (mask & Z_WRITE) {
778            if (mask == Z_WRITE) {
779                // only z-write asked, cc is meaningless
780                ic = AL;
781            }
782            MOV(AL, 0, depth, reg_imm(z, LSR, 16));
783            STRH(ic, depth, zbase);
784        }
785    }
786}
787
788void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
789{
790    const needs_t& needs = mBuilderContext.needs;
791    if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
792        Scratch scratches(registerFile());
793        int dzdx = scratches.obtain();
794        CONTEXT_LOAD(dzdx, generated_vars.dzdx);    // stall
795        ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
796    }
797}
798
799void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
800{
801    const needs_t& needs = mBuilderContext.needs;
802    if (GGL_READ_NEEDS(P_FOG, needs.p)) {
803        Scratch scratches(registerFile());
804        int dfdx = scratches.obtain();
805        int f = scratches.obtain();
806        CONTEXT_LOAD(f,     generated_vars.f);
807        CONTEXT_LOAD(dfdx,  generated_vars.dfdx);   // stall
808        ADD(AL, 0, f, f, dfdx);
809        CONTEXT_STORE(f,    generated_vars.f);
810    }
811}
812
813// ---------------------------------------------------------------------------
814
815void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
816{
817    const needs_t& needs = mBuilderContext.needs;
818    const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
819    if (opcode == GGL_COPY)
820        return;
821
822    comment("logic operation");
823
824    pixel_t s(pixel);
825    if (!(pixel.flags & CORRUPTIBLE)) {
826        pixel.reg = regs.obtain();
827        pixel.flags |= CORRUPTIBLE;
828    }
829
830    pixel_t d(mDstPixel);
831    switch(opcode) {
832    case GGL_CLEAR:         MOV(AL, 0, pixel.reg, imm(0));          break;
833    case GGL_AND:           AND(AL, 0, pixel.reg, s.reg, d.reg);    break;
834    case GGL_AND_REVERSE:   BIC(AL, 0, pixel.reg, s.reg, d.reg);    break;
835    case GGL_COPY:                                                  break;
836    case GGL_AND_INVERTED:  BIC(AL, 0, pixel.reg, d.reg, s.reg);    break;
837    case GGL_NOOP:          MOV(AL, 0, pixel.reg, d.reg);           break;
838    case GGL_XOR:           EOR(AL, 0, pixel.reg, s.reg, d.reg);    break;
839    case GGL_OR:            ORR(AL, 0, pixel.reg, s.reg, d.reg);    break;
840    case GGL_NOR:           ORR(AL, 0, pixel.reg, s.reg, d.reg);
841                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
842    case GGL_EQUIV:         EOR(AL, 0, pixel.reg, s.reg, d.reg);
843                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
844    case GGL_INVERT:        MVN(AL, 0, pixel.reg, d.reg);           break;
845    case GGL_OR_REVERSE:    // s | ~d == ~(~s & d)
846                            BIC(AL, 0, pixel.reg, d.reg, s.reg);
847                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
848    case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg);           break;
849    case GGL_OR_INVERTED:   // ~s | d == ~(s & ~d)
850                            BIC(AL, 0, pixel.reg, s.reg, d.reg);
851                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
852    case GGL_NAND:          AND(AL, 0, pixel.reg, s.reg, d.reg);
853                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
854    case GGL_SET:           MVN(AL, 0, pixel.reg, imm(0));          break;
855    };
856}
857
858// ---------------------------------------------------------------------------
859
860static uint32_t find_bottom(uint32_t val)
861{
862    uint32_t i = 0;
863    while (!(val & (3<<i)))
864        i+= 2;
865    return i;
866}
867
868static void normalize(uint32_t& val, uint32_t& rot)
869{
870    rot = 0;
871    while (!(val&3)  || (val & 0xFC000000)) {
872        uint32_t newval;
873        newval = val >> 2;
874        newval |= (val&3) << 30;
875        val = newval;
876        rot += 2;
877        if (rot == 32) {
878            rot = 0;
879            break;
880        }
881    }
882}
883
884void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
885{
886    uint32_t rot;
887    uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
888    mask &= size;
889
890    if (mask == size) {
891        if (d != s)
892            MOV( AL, 0, d, s);
893        return;
894    }
895
896    if (getCodegenArch() == CODEGEN_ARCH_MIPS) {
897        // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
898        // the below ' while (mask)' code is buggy on mips
899        // since mips returns true on isValidImmediate()
900        // then we get multiple AND instr (positive logic)
901        AND( AL, 0, d, s, imm(mask) );
902        return;
903    }
904
905    int negative_logic = !isValidImmediate(mask);
906    if (negative_logic) {
907        mask = ~mask & size;
908    }
909    normalize(mask, rot);
910
911    if (mask) {
912        while (mask) {
913            uint32_t bitpos = find_bottom(mask);
914            int shift = rot + bitpos;
915            uint32_t m = mask & (0xff << bitpos);
916            mask &= ~m;
917            m >>= bitpos;
918            int32_t newMask =  (m<<shift) | (m>>(32-shift));
919            if (!negative_logic) {
920                AND( AL, 0, d, s, imm(newMask) );
921            } else {
922                BIC( AL, 0, d, s, imm(newMask) );
923            }
924            s = d;
925        }
926    } else {
927        MOV( AL, 0, d, imm(0));
928    }
929}
930
931void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
932{
933    if (!mMasking || mAllMasked) {
934        return;
935    }
936
937    comment("color mask");
938
939    pixel_t fb(mDstPixel);
940    pixel_t s(pixel);
941    if (!(pixel.flags & CORRUPTIBLE)) {
942        pixel.reg = regs.obtain();
943        pixel.flags |= CORRUPTIBLE;
944    }
945
946    int mask = 0;
947    for (int i=0 ; i<4 ; i++) {
948        const int component_mask = 1<<i;
949        const int h = fb.format.c[i].h;
950        const int l = fb.format.c[i].l;
951        if (h && (!(mMasking & component_mask))) {
952            mask |= ((1<<(h-l))-1) << l;
953        }
954    }
955
956    // There is no need to clear the masked components of the source
957    // (unless we applied a logic op), because they're already zeroed
958    // by construction (masked components are not computed)
959
960    if (mLogicOp) {
961        const needs_t& needs = mBuilderContext.needs;
962        const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
963        if (opcode != GGL_CLEAR) {
964            // clear masked component of source
965            build_and_immediate(pixel.reg, s.reg, mask, fb.size());
966            s = pixel;
967        }
968    }
969
970    // clear non masked components of destination
971    build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
972
973    // or back the channels that were masked
974    if (s.reg == fb.reg) {
975         // this is in fact a MOV
976        if (s.reg == pixel.reg) {
977            // ugh. this in in fact a nop
978        } else {
979            MOV(AL, 0, pixel.reg, fb.reg);
980        }
981    } else {
982        ORR(AL, 0, pixel.reg, s.reg, fb.reg);
983    }
984}
985
986// ---------------------------------------------------------------------------
987
988void GGLAssembler::base_offset(
989        const pointer_t& d, const pointer_t& b, const reg_t& o)
990{
991    switch (b.size) {
992    case 32:
993        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
994        break;
995    case 24:
996        if (d.reg == b.reg) {
997            ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
998            ADD(AL, 0, d.reg, d.reg, o.reg);
999        } else {
1000            ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
1001            ADD(AL, 0, d.reg, d.reg, b.reg);
1002        }
1003        break;
1004    case 16:
1005        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1006        break;
1007    case 8:
1008        ADD(AL, 0, d.reg, b.reg, o.reg);
1009        break;
1010    }
1011}
1012
1013// ----------------------------------------------------------------------------
1014// cheezy register allocator...
1015// ----------------------------------------------------------------------------
1016
1017// Modified to support MIPS processors, in a very simple way. We retain the
1018// (Arm) limit of 16 total registers, but shift the mapping of those registers
1019// from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
1020// register 1 has a traditional use as a temp).
1021
1022RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
1023{
1024}
1025
1026void RegisterAllocator::reset()
1027{
1028    mRegs.reset();
1029}
1030
1031int RegisterAllocator::reserveReg(int reg)
1032{
1033    return mRegs.reserve(reg);
1034}
1035
1036int RegisterAllocator::obtainReg()
1037{
1038    return mRegs.obtain();
1039}
1040
1041void RegisterAllocator::recycleReg(int reg)
1042{
1043    mRegs.recycle(reg);
1044}
1045
1046RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1047{
1048    return mRegs;
1049}
1050
1051// ----------------------------------------------------------------------------
1052
1053RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
1054    : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
1055{
1056    if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) {
1057        mRegisterOffset = 2;    // ARM has regs 0..15, MIPS offset to 2..17
1058    }
1059    reserve(ARMAssemblerInterface::SP);
1060    reserve(ARMAssemblerInterface::PC);
1061}
1062
1063RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
1064    : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
1065{
1066    if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) {
1067        mRegisterOffset = 2;    // ARM has regs 0..15, MIPS offset to 2..17
1068    }
1069}
1070
1071RegisterAllocator::RegisterFile::~RegisterFile()
1072{
1073}
1074
1075bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1076{
1077    return (mRegs == rhs.mRegs);
1078}
1079
1080void RegisterAllocator::RegisterFile::reset()
1081{
1082    mRegs = mTouched = mStatus = 0;
1083    reserve(ARMAssemblerInterface::SP);
1084    reserve(ARMAssemblerInterface::PC);
1085}
1086
1087// RegisterFile::reserve() take a register parameter in the
1088// range 0-15 (Arm compatible), but on a Mips processor, will
1089// return the actual allocated register in the range 2-17.
1090int RegisterAllocator::RegisterFile::reserve(int reg)
1091{
1092    reg += mRegisterOffset;
1093    LOG_ALWAYS_FATAL_IF(isUsed(reg),
1094                        "reserving register %d, but already in use",
1095                        reg);
1096    mRegs |= (1<<reg);
1097    mTouched |= mRegs;
1098    return reg;
1099}
1100
1101// This interface uses regMask in range 2-17 on MIPS, no translation.
1102void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1103{
1104    mRegs |= regMask;
1105    mTouched |= regMask;
1106}
1107
1108int RegisterAllocator::RegisterFile::isUsed(int reg) const
1109{
1110    LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
1111    return mRegs & (1<<reg);
1112}
1113
1114int RegisterAllocator::RegisterFile::obtain()
1115{
1116    const char priorityList[14] = {  0,  1, 2, 3,
1117                                    12, 14, 4, 5,
1118                                     6,  7, 8, 9,
1119                                    10, 11 };
1120    const int nbreg = sizeof(priorityList);
1121    int i, r, reg;
1122    for (i=0 ; i<nbreg ; i++) {
1123        r = priorityList[i];
1124        if (!isUsed(r + mRegisterOffset)) {
1125            break;
1126        }
1127    }
1128    // this is not an error anymore because, we'll try again with
1129    // a lower optimization level.
1130    //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1131    if (i >= nbreg) {
1132        mStatus |= OUT_OF_REGISTERS;
1133        // we return SP so we can more easily debug things
1134        // the code will never be run anyway.
1135        return ARMAssemblerInterface::SP;
1136    }
1137    reg = reserve(r);  // Param in Arm range 0-15, returns range 2-17 on Mips.
1138    return reg;
1139}
1140
1141bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1142{
1143    uint32_t regs = mRegs >> mRegisterOffset;   // MIPS fix.
1144    return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
1145}
1146
1147int RegisterAllocator::RegisterFile::countFreeRegs() const
1148{
1149    uint32_t regs = mRegs >> mRegisterOffset;   // MIPS fix.
1150    int f = ~regs & 0xFFFF;
1151    // now count number of 1
1152   f = (f & 0x5555) + ((f>>1) & 0x5555);
1153   f = (f & 0x3333) + ((f>>2) & 0x3333);
1154   f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1155   f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1156   return f;
1157}
1158
1159void RegisterAllocator::RegisterFile::recycle(int reg)
1160{
1161    // commented out, since common failure of running out of regs
1162    // triggers this assertion. Since the code is not execectued
1163    // in that case, it does not matter. No reason to FATAL err.
1164    // LOG_FATAL_IF(!isUsed(reg),
1165    //         "recycling unallocated register %d",
1166    //         reg);
1167    mRegs &= ~(1<<reg);
1168}
1169
1170void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1171{
1172    // commented out, since common failure of running out of regs
1173    // triggers this assertion. Since the code is not execectued
1174    // in that case, it does not matter. No reason to FATAL err.
1175    // LOG_FATAL_IF((mRegs & regMask)!=regMask,
1176    //         "recycling unallocated registers "
1177    //         "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1178    //         regMask, mRegs, mRegs&regMask);
1179    mRegs &= ~regMask;
1180}
1181
1182uint32_t RegisterAllocator::RegisterFile::touched() const
1183{
1184    return mTouched;
1185}
1186
1187// ----------------------------------------------------------------------------
1188
1189}; // namespace android
1190
1191