GGLAssembler.cpp revision 4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53
1/* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define LOG_TAG "GGLAssembler"
19
20#include <assert.h>
21#include <stdint.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <sys/types.h>
25#include <cutils/log.h>
26
27#include "codeflinger/GGLAssembler.h"
28
29namespace android {
30
31// ----------------------------------------------------------------------------
32
33GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
34    : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
35{
36}
37
38GGLAssembler::~GGLAssembler()
39{
40}
41
42void GGLAssembler::prolog()
43{
44    ARMAssemblerProxy::prolog();
45}
46
47void GGLAssembler::epilog(uint32_t touched)
48{
49    ARMAssemblerProxy::epilog(touched);
50}
51
52void GGLAssembler::reset(int opt_level)
53{
54    ARMAssemblerProxy::reset();
55    RegisterAllocator::reset();
56    mOptLevel = opt_level;
57}
58
59// ---------------------------------------------------------------------------
60
61int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
62{
63    int err = 0;
64    int opt_level = mOptLevel;
65    while (opt_level >= 0) {
66        reset(opt_level);
67        err = scanline_core(needs, c);
68        if (err == 0)
69            break;
70        opt_level--;
71    }
72
73    // XXX: in theory, pcForLabel is not valid before generate()
74    uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
75    uint32_t* fragment_end_pc = pcForLabel("epilog");
76    const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
77
78    // build a name for our pipeline
79    char name[64];
80    sprintf(name,
81            "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
82            needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
83
84    if (err) {
85        LOGE("Error while generating ""%s""\n", name);
86        disassemble(name);
87        return -1;
88    }
89
90    return generate(name);
91}
92
93int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
94{
95    int64_t duration = ggl_system_time();
96
97    mBlendFactorCached = 0;
98    mBlending = 0;
99    mMasking = 0;
100    mAA        = GGL_READ_NEEDS(P_AA, needs.p);
101    mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102    mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103    mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104    mFog       = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105    mSmooth    = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106    mBuilderContext.needs = needs;
107    mBuilderContext.c = c;
108    mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109    mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110
111    // ------------------------------------------------------------------------
112
113    decodeLogicOpNeeds(needs);
114
115    decodeTMUNeeds(needs, c);
116
117    mBlendSrc  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118    mBlendDst  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119    mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120    mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121
122    if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123        if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124            (mBlendSrc == GGL_DST_ALPHA)) {
125            mBlendSrc = GGL_ONE;
126        }
127        if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128            (mBlendSrcA == GGL_DST_ALPHA)) {
129            mBlendSrcA = GGL_ONE;
130        }
131        if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132            (mBlendDst == GGL_DST_ALPHA)) {
133            mBlendDst = GGL_ONE;
134        }
135        if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136            (mBlendDstA == GGL_DST_ALPHA)) {
137            mBlendDstA = GGL_ONE;
138        }
139    }
140
141    // if we need the framebuffer, read it now
142    const int blending =    blending_codes(mBlendSrc, mBlendDst) |
143                            blending_codes(mBlendSrcA, mBlendDstA);
144
145    // XXX: handle special cases, destination not modified...
146    if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147        (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148        // Destination unmodified (beware of logic ops)
149    } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150        (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151        // Destination is zero (beware of logic ops)
152    }
153
154    const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
155    for (int i=0 ; i<4 ; i++) {
156        const int mask = 1<<i;
157        component_info_t& info = mInfo[i];
158        int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
159        int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
160        if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
161            fs = GGL_ONE;
162        info.masked =   !!(masking & mask);
163        info.inDest =   !info.masked && mCbFormat.c[i].h &&
164                        ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
165        if (mCbFormat.components >= GGL_LUMINANCE &&
166                (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
167            info.inDest = false;
168        }
169        info.needed =   (i==GGLFormat::ALPHA) &&
170                        (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
171        info.replaced = !!(mTextureMachine.replaced & mask);
172        info.iterated = (!info.replaced && (info.inDest || info.needed));
173        info.smooth =   mSmooth && info.iterated;
174        info.fog =      mFog && info.inDest && (i != GGLFormat::ALPHA);
175        info.blend =    (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
176
177        mBlending |= (info.blend ? mask : 0);
178        mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
179    }
180
181
182    fragment_parts_t parts;
183
184    // ------------------------------------------------------------------------
185    prolog();
186    // ------------------------------------------------------------------------
187
188    build_scanline_prolog(parts, needs);
189
190    if (registerFile().status())
191        return registerFile().status();
192
193    // ------------------------------------------------------------------------
194    label("fragment_loop");
195    // ------------------------------------------------------------------------
196    {
197        Scratch regs(registerFile());
198
199        if (mDithering) {
200            // update the dither index.
201            MOV(AL, 0, parts.count.reg,
202                    reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
203            ADD(AL, 0, parts.count.reg, parts.count.reg,
204                    imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
205            MOV(AL, 0, parts.count.reg,
206                    reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
207        }
208
209        // XXX: could we do an early alpha-test here in some cases?
210        // It would probaly be used only with smooth-alpha and no texture
211        // (or no alpha component in the texture).
212
213        // Early z-test
214        if (mAlphaTest==GGL_ALWAYS) {
215            build_depth_test(parts, Z_TEST|Z_WRITE);
216        } else {
217            // we cannot do the z-write here, because
218            // it might be killed by the alpha-test later
219            build_depth_test(parts, Z_TEST);
220        }
221
222        { // texture coordinates
223            Scratch scratches(registerFile());
224
225            // texel generation
226            build_textures(parts, regs);
227        }
228
229        if ((blending & (FACTOR_DST|BLEND_DST)) || mMasking ||
230                (mLogicOp & LOGIC_OP_DST)) {
231            // blending / logic_op / masking need the framebuffer
232            mDstPixel.setTo(regs.obtain(), &mCbFormat);
233
234            // load the framebuffer pixel
235            comment("fetch color-buffer");
236            load(parts.cbPtr, mDstPixel);
237        }
238
239        if (registerFile().status())
240            return registerFile().status();
241
242        pixel_t pixel;
243        int directTex = mTextureMachine.directTexture;
244        if (directTex | parts.packed) {
245            // note: we can't have both here
246            // iterated color or direct texture
247            pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
248            pixel.flags &= ~CORRUPTIBLE;
249        } else {
250            if (mDithering) {
251                const int ctxtReg = mBuilderContext.Rctx;
252                const int mask = GGL_DITHER_SIZE-1;
253                parts.dither = reg_t(regs.obtain());
254                AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
255                ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
256                LDRB(AL, parts.dither.reg, parts.dither.reg,
257                        immed12_pre(GGL_OFFSETOF(ditherMatrix)));
258            }
259
260            // allocate a register for the resulting pixel
261            pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
262
263            build_component(pixel, parts, GGLFormat::ALPHA,    regs);
264
265            if (mAlphaTest!=GGL_ALWAYS) {
266                // only handle the z-write part here. We know z-test
267                // was successful, as well as alpha-test.
268                build_depth_test(parts, Z_WRITE);
269            }
270
271            build_component(pixel, parts, GGLFormat::RED,      regs);
272            build_component(pixel, parts, GGLFormat::GREEN,    regs);
273            build_component(pixel, parts, GGLFormat::BLUE,     regs);
274
275            pixel.flags |= CORRUPTIBLE;
276        }
277
278        if (registerFile().status())
279            return registerFile().status();
280
281        if (pixel.reg == -1) {
282            // be defensive here. if we're here it's probably
283            // that this whole fragment is a no-op.
284            pixel = mDstPixel;
285        }
286
287        // logic operation
288        build_logic_op(pixel, regs);
289
290        // masking
291        build_masking(pixel, regs);
292
293        comment("store");
294        store(parts.cbPtr, pixel, WRITE_BACK);
295    }
296
297    if (registerFile().status())
298        return registerFile().status();
299
300    // update the iterated color...
301    if (parts.reload != 3) {
302        build_smooth_shade(parts);
303    }
304
305    // update iterated z
306    build_iterate_z(parts);
307
308    // update iterated fog
309    build_iterate_f(parts);
310
311    SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
312    B(PL, "fragment_loop");
313    label("epilog");
314    epilog(registerFile().touched());
315
316    if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
317        if (mDepthTest!=GGL_ALWAYS) {
318            label("discard_before_textures");
319            build_iterate_texture_coordinates(parts);
320        }
321        label("discard_after_textures");
322        build_smooth_shade(parts);
323        build_iterate_z(parts);
324        build_iterate_f(parts);
325        ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
326        SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
327        B(PL, "fragment_loop");
328        epilog(registerFile().touched());
329    }
330
331    return registerFile().status();
332}
333
334// ---------------------------------------------------------------------------
335
336void GGLAssembler::build_scanline_prolog(
337    fragment_parts_t& parts, const needs_t& needs)
338{
339    Scratch scratches(registerFile());
340    int Rctx = mBuilderContext.Rctx;
341
342    // compute count
343    comment("compute ct (# of pixels to process)");
344    parts.count.setTo(obtainReg());
345    int Rx = scratches.obtain();
346    int Ry = scratches.obtain();
347    CONTEXT_LOAD(Rx, iterators.xl);
348    CONTEXT_LOAD(parts.count.reg, iterators.xr);
349    CONTEXT_LOAD(Ry, iterators.y);
350
351    // parts.count = iterators.xr - Rx
352    SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
353    SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
354
355    if (mDithering) {
356        // parts.count.reg = 0xNNNNXXDD
357        // NNNN = count-1
358        // DD   = dither offset
359        // XX   = 0xxxxxxx (x = garbage)
360        Scratch scratches(registerFile());
361        int tx = scratches.obtain();
362        int ty = scratches.obtain();
363        AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
364        AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
365        ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
366        ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
367    } else {
368        // parts.count.reg = 0xNNNN0000
369        // NNNN = count-1
370        MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
371    }
372
373    // compute dst ptr
374    comment("compute color-buffer pointer");
375    const int cb_bits = mCbFormat.size*8;
376    int Rs = scratches.obtain();
377    parts.cbPtr.setTo(obtainReg(), cb_bits);
378    CONTEXT_LOAD(Rs, state.buffers.color.stride);
379    CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
380    SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
381    base_offset(parts.cbPtr, parts.cbPtr, Rs);
382    scratches.recycle(Rs);
383
384    // init fog
385    const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
386    if (need_fog) {
387        comment("compute initial fog coordinate");
388        Scratch scratches(registerFile());
389        int dfdx = scratches.obtain();
390        int ydfdy = scratches.obtain();
391        int f = ydfdy;
392        CONTEXT_LOAD(dfdx,  generated_vars.dfdx);
393        CONTEXT_LOAD(ydfdy, iterators.ydfdy);
394        MLA(AL, 0, f, Rx, dfdx, ydfdy);
395        CONTEXT_STORE(f, generated_vars.f);
396    }
397
398    // init Z coordinate
399    if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
400        parts.z = reg_t(obtainReg());
401        comment("compute initial Z coordinate");
402        Scratch scratches(registerFile());
403        int dzdx = scratches.obtain();
404        int ydzdy = parts.z.reg;
405        CONTEXT_LOAD(dzdx,  generated_vars.dzdx);   // 1.31 fixed-point
406        CONTEXT_LOAD(ydzdy, iterators.ydzdy);       // 1.31 fixed-point
407        MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
408
409        // we're going to index zbase of parts.count
410        // zbase = base + (xl-count + stride*y)*2
411        int Rs = dzdx;
412        int zbase = scratches.obtain();
413        CONTEXT_LOAD(Rs, state.buffers.depth.stride);
414        CONTEXT_LOAD(zbase, state.buffers.depth.data);
415        SMLABB(AL, Rs, Ry, Rs, Rx);
416        ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
417        ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
418        CONTEXT_STORE(zbase, generated_vars.zbase);
419    }
420
421    // init texture coordinates
422    init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
423    scratches.recycle(Ry);
424
425    // iterated color
426    init_iterated_color(parts, reg_t(Rx));
427
428    // init coverage factor application (anti-aliasing)
429    if (mAA) {
430        parts.covPtr.setTo(obtainReg(), 16);
431        CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
432        ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
433    }
434}
435
436// ---------------------------------------------------------------------------
437
438void GGLAssembler::build_component( pixel_t& pixel,
439                                    const fragment_parts_t& parts,
440                                    int component,
441                                    Scratch& regs)
442{
443    static char const * comments[] = {"alpha", "red", "green", "blue"};
444    comment(comments[component]);
445
446    // local register file
447    Scratch scratches(registerFile());
448    const int dst_component_size = pixel.component_size(component);
449
450    component_t temp(-1);
451    build_incoming_component( temp, dst_component_size,
452            parts, component, scratches, regs);
453
454    if (mInfo[component].inDest) {
455
456        // blending...
457        build_blending( temp, mDstPixel, component, scratches );
458
459        // downshift component and rebuild pixel...
460        downshift(pixel, component, temp, parts.dither);
461    }
462}
463
464void GGLAssembler::build_incoming_component(
465                                    component_t& temp,
466                                    int dst_size,
467                                    const fragment_parts_t& parts,
468                                    int component,
469                                    Scratch& scratches,
470                                    Scratch& global_regs)
471{
472    const uint32_t component_mask = 1<<component;
473
474    // Figure out what we need for the blending stage...
475    int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
476    int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
477    if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
478        fs = GGL_ONE;
479    }
480
481    // Figure out what we need to extract and for what reason
482    const int blending = blending_codes(fs, fd);
483
484    // Are we actually going to blend?
485    const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
486
487    // expand the source if the destination has more bits
488    int need_expander = false;
489    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
490        texture_unit_t& tmu = mTextureMachine.tmu[i];
491        if ((tmu.format_idx) &&
492            (parts.texel[i].component_size(component) < dst_size)) {
493            need_expander = true;
494        }
495    }
496
497    // do we need to extract this component?
498    const bool multiTexture = mTextureMachine.activeUnits > 1;
499    const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
500                                        (isAlphaSourceNeeded());
501    int need_extract = mInfo[component].needed;
502    if (mInfo[component].inDest)
503    {
504        need_extract |= ((need_blending ?
505                (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
506        need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
507        need_extract |= mInfo[component].smooth;
508        need_extract |= mInfo[component].fog;
509        need_extract |= mDithering;
510        need_extract |= multiTexture;
511    }
512
513    if (need_extract) {
514        Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
515        component_t fragment;
516
517        // iterated color
518        build_iterated_color(fragment, parts, component, regs);
519
520        // texture environement (decal, modulate, replace)
521        build_texture_environment(fragment, parts, component, regs);
522
523        // expand the source if the destination has more bits
524        if (need_expander && (fragment.size() < dst_size)) {
525            // we're here only if we fetched a texel
526            // (so we know for sure fragment is CORRUPTIBLE)
527            expand(fragment, fragment, dst_size);
528        }
529
530        // We have a few specific things to do for the alpha-channel
531        if ((component==GGLFormat::ALPHA) &&
532            (mInfo[component].needed || fragment.size()<dst_size))
533        {
534            // convert to integer_t first and make sure
535            // we don't corrupt a needed register
536            if (fragment.l) {
537                component_t incoming(fragment);
538                modify(fragment, regs);
539                MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
540                fragment.h -= fragment.l;
541                fragment.l = 0;
542            }
543
544            // coverage factor application
545            build_coverage_application(fragment, parts, regs);
546
547            // alpha-test
548            build_alpha_test(fragment, parts);
549
550            if (blend_needs_alpha_source) {
551                // We keep only 8 bits for the blending stage
552                const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
553                if (fragment.flags & CORRUPTIBLE) {
554                    fragment.flags &= ~CORRUPTIBLE;
555                    mAlphaSource.setTo(fragment.reg,
556                            fragment.size(), fragment.flags);
557                    if (shift) {
558                        MOV(AL, 0, mAlphaSource.reg,
559                            reg_imm(mAlphaSource.reg, LSR, shift));
560                    }
561                } else {
562                    // XXX: it would better to do this in build_blend_factor()
563                    // so we can avoid the extra MOV below.
564                    mAlphaSource.setTo(regs.obtain(),
565                            fragment.size(), CORRUPTIBLE);
566                    if (shift) {
567                        MOV(AL, 0, mAlphaSource.reg,
568                            reg_imm(fragment.reg, LSR, shift));
569                    } else {
570                        MOV(AL, 0, mAlphaSource.reg, fragment.reg);
571                    }
572                }
573                mAlphaSource.s -= shift;
574            }
575        }
576
577        // fog...
578        build_fog( fragment, component, regs );
579
580        temp = fragment;
581    } else {
582        if (mInfo[component].inDest) {
583            // extraction not needed and replace
584            // we just select the right component
585            if ((mTextureMachine.replaced & component_mask) == 0) {
586                // component wasn't replaced, so use it!
587                temp = component_t(parts.iterated, component);
588            }
589            for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
590                const texture_unit_t& tmu = mTextureMachine.tmu[i];
591                if ((tmu.mask & component_mask) &&
592                    ((tmu.replaced & component_mask) == 0)) {
593                    temp = component_t(parts.texel[i], component);
594                }
595            }
596        }
597    }
598}
599
600bool GGLAssembler::isAlphaSourceNeeded() const
601{
602    // XXX: also needed for alpha-test
603    const int bs = mBlendSrc;
604    const int bd = mBlendDst;
605    return  bs==GGL_SRC_ALPHA_SATURATE ||
606            bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
607            bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
608}
609
610// ---------------------------------------------------------------------------
611
612void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
613{
614    if (mSmooth && !parts.iterated_packed) {
615        // update the iterated color in a pipelined way...
616        comment("update iterated color");
617        Scratch scratches(registerFile());
618
619        const int reload = parts.reload;
620        for (int i=0 ; i<4 ; i++) {
621            if (!mInfo[i].iterated)
622                continue;
623
624            int c = parts.argb[i].reg;
625            int dx = parts.argb_dx[i].reg;
626
627            if (reload & 1) {
628                c = scratches.obtain();
629                CONTEXT_LOAD(c, generated_vars.argb[i].c);
630            }
631            if (reload & 2) {
632                dx = scratches.obtain();
633                CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
634            }
635
636            if (mSmooth) {
637                ADD(AL, 0, c, c, dx);
638            }
639
640            if (reload & 1) {
641                CONTEXT_STORE(c, generated_vars.argb[i].c);
642                scratches.recycle(c);
643            }
644            if (reload & 2) {
645                scratches.recycle(dx);
646            }
647        }
648    }
649}
650
651// ---------------------------------------------------------------------------
652
653void GGLAssembler::build_coverage_application(component_t& fragment,
654        const fragment_parts_t& parts, Scratch& regs)
655{
656    // here fragment.l is guarenteed to be 0
657    if (mAA) {
658        // coverages are 1.15 fixed-point numbers
659        comment("coverage application");
660
661        component_t incoming(fragment);
662        modify(fragment, regs);
663
664        Scratch scratches(registerFile());
665        int cf = scratches.obtain();
666        LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
667        if (fragment.h > 31) {
668            fragment.h--;
669            SMULWB(AL, fragment.reg, incoming.reg, cf);
670        } else {
671            MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
672            SMULWB(AL, fragment.reg, fragment.reg, cf);
673        }
674    }
675}
676
677// ---------------------------------------------------------------------------
678
679void GGLAssembler::build_alpha_test(component_t& fragment,
680                                    const fragment_parts_t& parts)
681{
682    if (mAlphaTest != GGL_ALWAYS) {
683        comment("Alpha Test");
684        Scratch scratches(registerFile());
685        int ref = scratches.obtain();
686        const int shift = GGL_COLOR_BITS-fragment.size();
687        CONTEXT_LOAD(ref, state.alpha_test.ref);
688        if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
689        else       CMP(AL, fragment.reg, ref);
690        int cc = NV;
691        switch (mAlphaTest) {
692        case GGL_NEVER:     cc = NV;    break;
693        case GGL_LESS:      cc = LT;    break;
694        case GGL_EQUAL:     cc = EQ;    break;
695        case GGL_LEQUAL:    cc = LS;    break;
696        case GGL_GREATER:   cc = HI;    break;
697        case GGL_NOTEQUAL:  cc = NE;    break;
698        case GGL_GEQUAL:    cc = HS;    break;
699        }
700        B(cc^1, "discard_after_textures");
701    }
702}
703
704// ---------------------------------------------------------------------------
705
706void GGLAssembler::build_depth_test(
707        const fragment_parts_t& parts, uint32_t mask)
708{
709    mask &= Z_TEST|Z_WRITE;
710    const needs_t& needs = mBuilderContext.needs;
711    const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
712    Scratch scratches(registerFile());
713
714    if (mDepthTest != GGL_ALWAYS || zmask) {
715        int cc=AL, ic=AL;
716        switch (mDepthTest) {
717        case GGL_LESS:      ic = HI;    break;
718        case GGL_EQUAL:     ic = EQ;    break;
719        case GGL_LEQUAL:    ic = HS;    break;
720        case GGL_GREATER:   ic = LT;    break;
721        case GGL_NOTEQUAL:  ic = NE;    break;
722        case GGL_GEQUAL:    ic = LS;    break;
723        case GGL_NEVER:
724            // this never happens, because it's taken care of when
725            // computing the needs. but we keep it for completness.
726            comment("Depth Test (NEVER)");
727            B(AL, "discard_before_textures");
728            return;
729        case GGL_ALWAYS:
730            // we're here because zmask is enabled
731            mask &= ~Z_TEST;    // test always passes.
732            break;
733        }
734
735        // inverse the condition
736        cc = ic^1;
737
738        if ((mask & Z_WRITE) && !zmask) {
739            mask &= ~Z_WRITE;
740        }
741
742        if (!mask)
743            return;
744
745        comment("Depth Test");
746
747        int zbase = scratches.obtain();
748        int depth = scratches.obtain();
749        int z = parts.z.reg;
750
751        CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
752        SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
753            // above does zbase = zbase + ((count >> 16) << 1)
754
755        if (mask & Z_TEST) {
756            LDRH(AL, depth, zbase);  // stall
757            CMP(AL, depth, reg_imm(z, LSR, 16));
758            B(cc, "discard_before_textures");
759        }
760        if (mask & Z_WRITE) {
761            if (mask == Z_WRITE) {
762                // only z-write asked, cc is meaningless
763                ic = AL;
764            }
765            MOV(AL, 0, depth, reg_imm(z, LSR, 16));
766            STRH(ic, depth, zbase);
767        }
768    }
769}
770
771void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
772{
773    const needs_t& needs = mBuilderContext.needs;
774    if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
775        Scratch scratches(registerFile());
776        int dzdx = scratches.obtain();
777        CONTEXT_LOAD(dzdx, generated_vars.dzdx);    // stall
778        ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
779    }
780}
781
782void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
783{
784    const needs_t& needs = mBuilderContext.needs;
785    if (GGL_READ_NEEDS(P_FOG, needs.p)) {
786        Scratch scratches(registerFile());
787        int dfdx = scratches.obtain();
788        int f = scratches.obtain();
789        CONTEXT_LOAD(f,     generated_vars.f);
790        CONTEXT_LOAD(dfdx,  generated_vars.dfdx);   // stall
791        ADD(AL, 0, f, f, dfdx);
792        CONTEXT_STORE(f,    generated_vars.f);
793    }
794}
795
796// ---------------------------------------------------------------------------
797
798void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
799{
800    const needs_t& needs = mBuilderContext.needs;
801    const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
802    if (opcode == GGL_COPY)
803        return;
804
805    comment("logic operation");
806
807    pixel_t s(pixel);
808    if (!(pixel.flags & CORRUPTIBLE)) {
809        pixel.reg = regs.obtain();
810        pixel.flags |= CORRUPTIBLE;
811    }
812
813    pixel_t d(mDstPixel);
814    switch(opcode) {
815    case GGL_CLEAR:         MOV(AL, 0, pixel.reg, imm(0));          break;
816    case GGL_AND:           AND(AL, 0, pixel.reg, s.reg, d.reg);    break;
817    case GGL_AND_REVERSE:   BIC(AL, 0, pixel.reg, s.reg, d.reg);    break;
818    case GGL_COPY:                                                  break;
819    case GGL_AND_INVERTED:  BIC(AL, 0, pixel.reg, d.reg, s.reg);    break;
820    case GGL_NOOP:          MOV(AL, 0, pixel.reg, d.reg);           break;
821    case GGL_XOR:           EOR(AL, 0, pixel.reg, s.reg, d.reg);    break;
822    case GGL_OR:            ORR(AL, 0, pixel.reg, s.reg, d.reg);    break;
823    case GGL_NOR:           ORR(AL, 0, pixel.reg, s.reg, d.reg);
824                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
825    case GGL_EQUIV:         EOR(AL, 0, pixel.reg, s.reg, d.reg);
826                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
827    case GGL_INVERT:        MVN(AL, 0, pixel.reg, d.reg);           break;
828    case GGL_OR_REVERSE:    // s | ~d == ~(~s & d)
829                            BIC(AL, 0, pixel.reg, d.reg, s.reg);
830                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
831    case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg);           break;
832    case GGL_OR_INVERTED:   // ~s | d == ~(s & ~d)
833                            BIC(AL, 0, pixel.reg, s.reg, d.reg);
834                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
835    case GGL_NAND:          AND(AL, 0, pixel.reg, s.reg, d.reg);
836                            MVN(AL, 0, pixel.reg, pixel.reg);       break;
837    case GGL_SET:           MVN(AL, 0, pixel.reg, imm(0));          break;
838    };
839}
840
841// ---------------------------------------------------------------------------
842
843static uint32_t find_bottom(uint32_t val)
844{
845    uint32_t i = 0;
846    while (!(val & (3<<i)))
847        i+= 2;
848    return i;
849}
850
851static void normalize(uint32_t& val, uint32_t& rot)
852{
853    rot = 0;
854    while (!(val&3)  || (val & 0xFC000000)) {
855        uint32_t newval;
856        newval = val >> 2;
857        newval |= (val&3) << 30;
858        val = newval;
859        rot += 2;
860        if (rot == 32) {
861            rot = 0;
862            break;
863        }
864    }
865}
866
867void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
868{
869    uint32_t rot;
870    uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
871    mask &= size;
872
873    if (mask == size) {
874        if (d != s)
875            MOV( AL, 0, d, s);
876        return;
877    }
878
879    int negative_logic = !isValidImmediate(mask);
880    if (negative_logic) {
881        mask = ~mask & size;
882    }
883    normalize(mask, rot);
884
885    if (mask) {
886        while (mask) {
887            uint32_t bitpos = find_bottom(mask);
888            int shift = rot + bitpos;
889            uint32_t m = mask & (0xff << bitpos);
890            mask &= ~m;
891            m >>= bitpos;
892            int32_t newMask =  (m<<shift) | (m>>(32-shift));
893            if (!negative_logic) {
894                AND( AL, 0, d, s, imm(newMask) );
895            } else {
896                BIC( AL, 0, d, s, imm(newMask) );
897            }
898            s = d;
899        }
900    } else {
901        MOV( AL, 0, d, imm(0));
902    }
903}
904
905void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
906{
907    if (!mMasking)
908        return;
909
910    comment("color mask");
911
912    pixel_t fb(mDstPixel);
913    pixel_t s(pixel);
914    if (!(pixel.flags & CORRUPTIBLE)) {
915        pixel.reg = regs.obtain();
916        pixel.flags |= CORRUPTIBLE;
917    }
918
919    int mask = 0;
920    for (int i=0 ; i<4 ; i++) {
921        const int component_mask = 1<<i;
922        const int h = fb.format.c[i].h;
923        const int l = fb.format.c[i].l;
924        if (h && (!(mMasking & component_mask))) {
925            mask |= ((1<<(h-l))-1) << l;
926        }
927    }
928
929    // There is no need to clear the masked components of the source
930    // (unless we applied a logic op), because they're already zeroed
931    // by contruction (masked components are not computed)
932
933    if (mLogicOp) {
934        const needs_t& needs = mBuilderContext.needs;
935        const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
936        if (opcode != GGL_CLEAR) {
937            // clear masked component of source
938            build_and_immediate(pixel.reg, s.reg, mask, fb.size());
939            s = pixel;
940        }
941    }
942
943    // clear non masked components of destination
944    build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
945
946    // or back the channels that were masked
947    if (s.reg == fb.reg) {
948         // this is in fact a MOV
949        if (s.reg == pixel.reg) {
950            // ugh. this in in fact a nop
951        } else {
952            MOV(AL, 0, pixel.reg, fb.reg);
953        }
954    } else {
955        ORR(AL, 0, pixel.reg, s.reg, fb.reg);
956    }
957}
958
959// ---------------------------------------------------------------------------
960
961void GGLAssembler::base_offset(
962        const pointer_t& d, const pointer_t& b, const reg_t& o)
963{
964    switch (b.size) {
965    case 32:
966        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
967        break;
968    case 24:
969        if (d.reg == b.reg) {
970            ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
971            ADD(AL, 0, d.reg, d.reg, o.reg);
972        } else {
973            ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
974            ADD(AL, 0, d.reg, d.reg, b.reg);
975        }
976        break;
977    case 16:
978        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
979        break;
980    case 8:
981        ADD(AL, 0, d.reg, b.reg, o.reg);
982        break;
983    }
984}
985
986// ----------------------------------------------------------------------------
987// cheezy register allocator...
988// ----------------------------------------------------------------------------
989
990void RegisterAllocator::reset()
991{
992    mRegs.reset();
993}
994
995int RegisterAllocator::reserveReg(int reg)
996{
997    return mRegs.reserve(reg);
998}
999
1000int RegisterAllocator::obtainReg()
1001{
1002    return mRegs.obtain();
1003}
1004
1005void RegisterAllocator::recycleReg(int reg)
1006{
1007    mRegs.recycle(reg);
1008}
1009
1010RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1011{
1012    return mRegs;
1013}
1014
1015// ----------------------------------------------------------------------------
1016
1017RegisterAllocator::RegisterFile::RegisterFile()
1018    : mRegs(0), mTouched(0), mStatus(0)
1019{
1020    reserve(ARMAssemblerInterface::SP);
1021    reserve(ARMAssemblerInterface::PC);
1022}
1023
1024RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
1025    : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
1026{
1027}
1028
1029RegisterAllocator::RegisterFile::~RegisterFile()
1030{
1031}
1032
1033bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1034{
1035    return (mRegs == rhs.mRegs);
1036}
1037
1038void RegisterAllocator::RegisterFile::reset()
1039{
1040    mRegs = mTouched = mStatus = 0;
1041    reserve(ARMAssemblerInterface::SP);
1042    reserve(ARMAssemblerInterface::PC);
1043}
1044
1045int RegisterAllocator::RegisterFile::reserve(int reg)
1046{
1047    LOG_ALWAYS_FATAL_IF(isUsed(reg),
1048                        "reserving register %d, but already in use",
1049                        reg);
1050    mRegs |= (1<<reg);
1051    mTouched |= mRegs;
1052    return reg;
1053}
1054
1055void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1056{
1057    mRegs |= regMask;
1058    mTouched |= regMask;
1059}
1060
1061int RegisterAllocator::RegisterFile::isUsed(int reg) const
1062{
1063    LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
1064    return mRegs & (1<<reg);
1065}
1066
1067int RegisterAllocator::RegisterFile::obtain()
1068{
1069    const char priorityList[14] = {  0,  1, 2, 3,
1070                                    12, 14, 4, 5,
1071                                     6,  7, 8, 9,
1072                                    10, 11 };
1073    const int nbreg = sizeof(priorityList);
1074    int i, r;
1075    for (i=0 ; i<nbreg ; i++) {
1076        r = priorityList[i];
1077        if (!isUsed(r)) {
1078            break;
1079        }
1080    }
1081    // this is not an error anymore because, we'll try again with
1082    // a lower optimization level.
1083    //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1084    if (i >= nbreg) {
1085        mStatus |= OUT_OF_REGISTERS;
1086        // we return SP so we can more easily debug things
1087        // the code will never be run anyway.
1088        return ARMAssemblerInterface::SP;
1089    }
1090    reserve(r);
1091    return r;
1092}
1093
1094bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1095{
1096    return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
1097}
1098
1099int RegisterAllocator::RegisterFile::countFreeRegs() const
1100{
1101    int f = ~mRegs & 0xFFFF;
1102    // now count number of 1
1103   f = (f & 0x5555) + ((f>>1) & 0x5555);
1104   f = (f & 0x3333) + ((f>>2) & 0x3333);
1105   f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1106   f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1107   return f;
1108}
1109
1110void RegisterAllocator::RegisterFile::recycle(int reg)
1111{
1112    LOG_FATAL_IF(!isUsed(reg),
1113            "recycling unallocated register %d",
1114            reg);
1115    mRegs &= ~(1<<reg);
1116}
1117
1118void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1119{
1120    LOG_FATAL_IF((mRegs & regMask)!=regMask,
1121            "recycling unallocated registers "
1122            "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1123            regMask, mRegs, mRegs&regMask);
1124    mRegs &= ~regMask;
1125}
1126
1127uint32_t RegisterAllocator::RegisterFile::touched() const
1128{
1129    return mTouched;
1130}
1131
1132// ----------------------------------------------------------------------------
1133
1134}; // namespace android
1135
1136