blending.cpp revision dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0
1/* libs/pixelflinger/codeflinger/blending.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdint.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <sys/types.h>
23
24#include <cutils/log.h>
25
26#include "codeflinger/GGLAssembler.h"
27
28
29namespace android {
30
31void GGLAssembler::build_fog(
32                        component_t& temp,      // incomming fragment / output
33                        int component,
34                        Scratch& regs)
35{
36   if (mInfo[component].fog) {
37        Scratch scratches(registerFile());
38        comment("fog");
39
40        integer_t fragment(temp.reg, temp.h, temp.flags);
41        if (!(temp.flags & CORRUPTIBLE)) {
42            temp.reg = regs.obtain();
43            temp.flags |= CORRUPTIBLE;
44        }
45
46        integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
47        LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
48                immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
49
50        integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
51        CONTEXT_LOAD(factor.reg, generated_vars.f);
52
53        // clamp fog factor (TODO: see if there is a way to guarantee
54        // we won't overflow, when setting the iterators)
55        BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
56        CMP(AL, factor.reg, imm( 0x10000 ));
57        MOV(HS, 0, factor.reg, imm( 0x10000 ));
58
59        build_blendFOneMinusF(temp, factor, fragment, fogColor);
60    }
61}
62
63void GGLAssembler::build_blending(
64                        component_t& temp,      // incomming fragment / output
65                        const pixel_t& pixel,   // framebuffer
66                        int component,
67                        Scratch& regs)
68{
69   if (!mInfo[component].blend)
70        return;
71
72    int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
73    int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
74    if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
75        fs = GGL_ONE;
76    const int blending = blending_codes(fs, fd);
77    if (!temp.size()) {
78        // here, blending will produce something which doesn't depend on
79        // that component (eg: GL_ZERO:GL_*), so the register has not been
80        // allocated yet. Will never be used as a source.
81        temp = component_t(regs.obtain(), CORRUPTIBLE);
82    }
83
84    // we are doing real blending...
85    // fb:          extracted dst
86    // fragment:    extracted src
87    // temp:        component_t(fragment) and result
88
89    // scoped register allocator
90    Scratch scratches(registerFile());
91    comment("blending");
92
93    // we can optimize these cases a bit...
94    // (1) saturation is not needed
95    // (2) we can use only one multiply instead of 2
96    // (3) we can reduce the register pressure
97    //      R = S*f + D*(1-f) = (S-D)*f + D
98    //      R = S*(1-f) + D*f = (D-S)*f + S
99
100    const bool same_factor_opt1 =
101        (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
102        (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
103        (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
104        (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
105
106    const bool same_factor_opt2 =
107        (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
108        (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
109        (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
110        (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
111
112
113    // XXX: we could also optimize these cases:
114    // R = S*f + D*f = (S+D)*f
115    // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
116    // R = S*D + D*S = 2*S*D
117
118
119    // see if we need to extract 'component' from the destination (fb)
120    integer_t fb;
121    if (blending & (BLEND_DST|FACTOR_DST)) {
122        fb.setTo(scratches.obtain(), 32);
123        extract(fb, pixel, component);
124        if (mDithering) {
125            // XXX: maybe what we should do instead, is simply
126            // expand fb -or- fragment to the larger of the two
127            if (fb.size() < temp.size()) {
128                // for now we expand 'fb' to min(fragment, 8)
129                int new_size = temp.size() < 8 ? temp.size() : 8;
130                expand(fb, fb, new_size);
131            }
132        }
133    }
134
135
136    // convert input fragment to integer_t
137    if (temp.l && (temp.flags & CORRUPTIBLE)) {
138        MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
139        temp.h -= temp.l;
140        temp.l = 0;
141    }
142    integer_t fragment(temp.reg, temp.size(), temp.flags);
143
144    // if not done yet, convert input fragment to integer_t
145    if (temp.l) {
146        // here we know temp is not CORRUPTIBLE
147        fragment.reg = scratches.obtain();
148        MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
149        fragment.flags |= CORRUPTIBLE;
150    }
151
152    if (!(temp.flags & CORRUPTIBLE)) {
153        // temp is not corruptible, but since it's the destination it
154        // will be modified, so we need to allocate a new register.
155        temp.reg = regs.obtain();
156        temp.flags &= ~CORRUPTIBLE;
157        fragment.flags &= ~CORRUPTIBLE;
158    }
159
160    if ((blending & BLEND_SRC) && !same_factor_opt1) {
161        // source (fragment) is needed for the blending stage
162        // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
163        fragment.flags &= ~CORRUPTIBLE;
164    }
165
166
167    if (same_factor_opt1) {
168        //  R = S*f + D*(1-f) = (S-D)*f + D
169        integer_t factor;
170        build_blend_factor(factor, fs,
171                component, pixel, fragment, fb, scratches);
172        // fb is always corruptible from this point
173        fb.flags |= CORRUPTIBLE;
174        build_blendFOneMinusF(temp, factor, fragment, fb);
175    } else if (same_factor_opt2) {
176        //  R = S*(1-f) + D*f = (D-S)*f + S
177        integer_t factor;
178        // fb is always corrruptible here
179        fb.flags |= CORRUPTIBLE;
180        build_blend_factor(factor, fd,
181                component, pixel, fragment, fb, scratches);
182        build_blendOneMinusFF(temp, factor, fragment, fb);
183    } else {
184        integer_t src_factor;
185        integer_t dst_factor;
186
187        // if destination (fb) is not needed for the blending stage,
188        // then it can be marked as CORRUPTIBLE
189        if (!(blending & BLEND_DST)) {
190            fb.flags |= CORRUPTIBLE;
191        }
192
193        // XXX: try to mark some registers as CORRUPTIBLE
194        // in most case we could make those corruptible
195        // when we're processing the last component
196        // but not always, for instance
197        //    when fragment is constant and not reloaded
198        //    when fb is needed for logic-ops or masking
199        //    when a register is aliased (for instance with mAlphaSource)
200
201        // blend away...
202        if (fs==GGL_ZERO) {
203            if (fd==GGL_ZERO) {         // R = 0
204                // already taken care of
205            } else if (fd==GGL_ONE) {   // R = D
206                // already taken care of
207            } else {                    // R = D*fd
208                // compute fd
209                build_blend_factor(dst_factor, fd,
210                        component, pixel, fragment, fb, scratches);
211                mul_factor(temp, fb, dst_factor);
212            }
213        } else if (fs==GGL_ONE) {
214            if (fd==GGL_ZERO) {         // R = S
215                // NOP, taken care of
216            } else if (fd==GGL_ONE) {   // R = S + D
217                component_add(temp, fb, fragment); // args order matters
218                component_sat(temp);
219            } else {                    // R = S + D*fd
220                // compute fd
221                build_blend_factor(dst_factor, fd,
222                        component, pixel, fragment, fb, scratches);
223                mul_factor_add(temp, fb, dst_factor, component_t(fragment));
224                if (fd==GGL_ONE_MINUS_SRC_ALPHA) {
225                    // XXX: in theory this is not correct, we should
226                    // saturate here. However, this mode is often
227                    // used for displaying alpha-premultiplied graphics,
228                    // in which case, saturation is not necessary.
229                    // unfortunatelly, we have no way to know.
230                    // This is a case, where we sacrifice correctness for
231                    // performance. we should probably have some heuristics.
232                } else {
233                    component_sat(temp);
234                }
235            }
236        } else {
237            // compute fs
238            build_blend_factor(src_factor, fs,
239                    component, pixel, fragment, fb, scratches);
240            if (fd==GGL_ZERO) {         // R = S*fs
241                mul_factor(temp, fragment, src_factor);
242            } else if (fd==GGL_ONE) {   // R = S*fs + D
243                mul_factor_add(temp, fragment, src_factor, component_t(fb));
244                component_sat(temp);
245            } else {                    // R = S*fs + D*fd
246                mul_factor(temp, fragment, src_factor);
247                if (scratches.isUsed(src_factor.reg))
248                    scratches.recycle(src_factor.reg);
249                // compute fd
250                build_blend_factor(dst_factor, fd,
251                        component, pixel, fragment, fb, scratches);
252                mul_factor_add(temp, fb, dst_factor, temp);
253                if (!same_factor_opt1 && !same_factor_opt2) {
254                    component_sat(temp);
255                }
256            }
257        }
258    }
259
260    // now we can be corrupted (it's the dest)
261    temp.flags |= CORRUPTIBLE;
262}
263
264void GGLAssembler::build_blend_factor(
265        integer_t& factor, int f, int component,
266        const pixel_t& dst_pixel,
267        integer_t& fragment,
268        integer_t& fb,
269        Scratch& scratches)
270{
271    integer_t src_alpha(fragment);
272
273    // src_factor/dst_factor won't be used after blending,
274    // so it's fine to mark them as CORRUPTIBLE (if not aliased)
275    factor.flags |= CORRUPTIBLE;
276
277    switch(f) {
278    case GGL_ONE_MINUS_SRC_ALPHA:
279    case GGL_SRC_ALPHA:
280        if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
281            // we're processing alpha, so we already have
282            // src-alpha in fragment, and we need src-alpha just this time.
283        } else {
284           // alpha-src will be needed for other components
285            if (!mBlendFactorCached || mBlendFactorCached==f) {
286                src_alpha = mAlphaSource;
287                factor = mAlphaSource;
288                factor.flags &= ~CORRUPTIBLE;
289                // we already computed the blend factor before, nothing to do.
290                if (mBlendFactorCached)
291                    return;
292                // this is the first time, make sure to compute the blend
293                // factor properly.
294                mBlendFactorCached = f;
295                break;
296            } else {
297                // we have a cached alpha blend factor, but we want another one,
298                // this should really not happen because by construction,
299                // we cannot have BOTH source and destination
300                // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
301                // the blending stage uses the f/(1-f) optimization
302
303                // for completeness, we handle this case though. Since there
304                // are only 2 choices, this meens we want "the other one"
305                // (1-factor)
306                factor = mAlphaSource;
307                factor.flags &= ~CORRUPTIBLE;
308                RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
309                mBlendFactorCached = f;
310                return;
311            }
312        }
313        // fall-through...
314    case GGL_ONE_MINUS_DST_COLOR:
315    case GGL_DST_COLOR:
316    case GGL_ONE_MINUS_SRC_COLOR:
317    case GGL_SRC_COLOR:
318    case GGL_ONE_MINUS_DST_ALPHA:
319    case GGL_DST_ALPHA:
320    case GGL_SRC_ALPHA_SATURATE:
321        // help us find out what register we can use for the blend-factor
322        // CORRUPTIBLE registers are chosen first, or a new one is allocated.
323        if (fragment.flags & CORRUPTIBLE) {
324            factor.setTo(fragment.reg, 32, CORRUPTIBLE);
325            fragment.flags &= ~CORRUPTIBLE;
326        } else if (fb.flags & CORRUPTIBLE) {
327            factor.setTo(fb.reg, 32, CORRUPTIBLE);
328            fb.flags &= ~CORRUPTIBLE;
329        } else {
330            factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
331        }
332        break;
333    }
334
335    // XXX: doesn't work if size==1
336
337    switch(f) {
338    case GGL_ONE_MINUS_DST_COLOR:
339    case GGL_DST_COLOR:
340        factor.s = fb.s;
341        ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
342        break;
343    case GGL_ONE_MINUS_SRC_COLOR:
344    case GGL_SRC_COLOR:
345        factor.s = fragment.s;
346        ADD(AL, 0, factor.reg, fragment.reg,
347            reg_imm(fragment.reg, LSR, fragment.s-1));
348        break;
349    case GGL_ONE_MINUS_SRC_ALPHA:
350    case GGL_SRC_ALPHA:
351        factor.s = src_alpha.s;
352        ADD(AL, 0, factor.reg, src_alpha.reg,
353                reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
354        break;
355    case GGL_ONE_MINUS_DST_ALPHA:
356    case GGL_DST_ALPHA:
357        // XXX: should be precomputed
358        extract(factor, dst_pixel, GGLFormat::ALPHA);
359        ADD(AL, 0, factor.reg, factor.reg,
360                reg_imm(factor.reg, LSR, factor.s-1));
361        break;
362    case GGL_SRC_ALPHA_SATURATE:
363        // XXX: should be precomputed
364        // XXX: f = min(As, 1-Ad)
365        // btw, we're guaranteed that Ad's size is <= 8, because
366        // it's extracted from the framebuffer
367        break;
368    }
369
370    switch(f) {
371    case GGL_ONE_MINUS_DST_COLOR:
372    case GGL_ONE_MINUS_SRC_COLOR:
373    case GGL_ONE_MINUS_DST_ALPHA:
374    case GGL_ONE_MINUS_SRC_ALPHA:
375        RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
376    }
377
378    // don't need more than 8-bits for the blend factor
379    // and this will prevent overflows in the multiplies later
380    if (factor.s > 8) {
381        MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
382        factor.s = 8;
383    }
384}
385
386int GGLAssembler::blending_codes(int fs, int fd)
387{
388    int blending = 0;
389    switch(fs) {
390    case GGL_ONE:
391        blending |= BLEND_SRC;
392        break;
393
394    case GGL_ONE_MINUS_DST_COLOR:
395    case GGL_DST_COLOR:
396        blending |= FACTOR_DST|BLEND_SRC;
397        break;
398    case GGL_ONE_MINUS_DST_ALPHA:
399    case GGL_DST_ALPHA:
400        // no need to extract 'component' from the destination
401        // for the blend factor, because we need ALPHA only.
402        blending |= BLEND_SRC;
403        break;
404
405    case GGL_ONE_MINUS_SRC_COLOR:
406    case GGL_SRC_COLOR:
407        blending |= FACTOR_SRC|BLEND_SRC;
408        break;
409    case GGL_ONE_MINUS_SRC_ALPHA:
410    case GGL_SRC_ALPHA:
411    case GGL_SRC_ALPHA_SATURATE:
412        blending |= FACTOR_SRC|BLEND_SRC;
413        break;
414    }
415    switch(fd) {
416    case GGL_ONE:
417        blending |= BLEND_DST;
418        break;
419
420    case GGL_ONE_MINUS_DST_COLOR:
421    case GGL_DST_COLOR:
422        blending |= FACTOR_DST|BLEND_DST;
423        break;
424    case GGL_ONE_MINUS_DST_ALPHA:
425    case GGL_DST_ALPHA:
426        blending |= FACTOR_DST|BLEND_DST;
427        break;
428
429    case GGL_ONE_MINUS_SRC_COLOR:
430    case GGL_SRC_COLOR:
431        blending |= FACTOR_SRC|BLEND_DST;
432        break;
433    case GGL_ONE_MINUS_SRC_ALPHA:
434    case GGL_SRC_ALPHA:
435        // no need to extract 'component' from the source
436        // for the blend factor, because we need ALPHA only.
437        blending |= BLEND_DST;
438        break;
439    }
440    return blending;
441}
442
443// ---------------------------------------------------------------------------
444
445void GGLAssembler::build_blendFOneMinusF(
446        component_t& temp,
447        const integer_t& factor,
448        const integer_t& fragment,
449        const integer_t& fb)
450{
451    //  R = S*f + D*(1-f) = (S-D)*f + D
452    Scratch scratches(registerFile());
453    // compute S-D
454    integer_t diff(fragment.flags & CORRUPTIBLE ?
455            fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
456    const int shift = fragment.size() - fb.size();
457    if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
458    else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
459    else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
460    mul_factor_add(temp, diff, factor, component_t(fb));
461}
462
463void GGLAssembler::build_blendOneMinusFF(
464        component_t& temp,
465        const integer_t& factor,
466        const integer_t& fragment,
467        const integer_t& fb)
468{
469    //  R = S*f + D*(1-f) = (S-D)*f + D
470    Scratch scratches(registerFile());
471    // compute D-S
472    integer_t diff(fb.flags & CORRUPTIBLE ?
473            fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
474    const int shift = fragment.size() - fb.size();
475    if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
476    else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
477    else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
478    mul_factor_add(temp, diff, factor, component_t(fragment));
479}
480
481// ---------------------------------------------------------------------------
482
483void GGLAssembler::mul_factor(  component_t& d,
484                                const integer_t& v,
485                                const integer_t& f)
486{
487    int vs = v.size();
488    int fs = f.size();
489    int ms = vs+fs;
490
491    // XXX: we could have special cases for 1 bit mul
492
493    // all this code below to use the best multiply instruction
494    // wrt the parameters size. We take advantage of the fact
495    // that the 16-bits multiplies allow a 16-bit shift
496    // The trick is that we just make sure that we have at least 8-bits
497    // per component (which is enough for a 8 bits display).
498
499    int xy;
500    int vshift = 0;
501    int fshift = 0;
502    int smulw = 0;
503
504    if (vs<16) {
505        if (fs<16) {
506            xy = xyBB;
507        } else if (GGL_BETWEEN(fs, 24, 31)) {
508            ms -= 16;
509            xy = xyTB;
510        } else {
511            // eg: 15 * 18  ->  15 * 15
512            fshift = fs - 15;
513            ms -= fshift;
514            xy = xyBB;
515        }
516    } else if (GGL_BETWEEN(vs, 24, 31)) {
517        if (fs<16) {
518            ms -= 16;
519            xy = xyTB;
520        } else if (GGL_BETWEEN(fs, 24, 31)) {
521            ms -= 32;
522            xy = xyTT;
523        } else {
524            // eg: 24 * 18  ->  8 * 18
525            fshift = fs - 15;
526            ms -= 16 + fshift;
527            xy = xyTB;
528        }
529    } else {
530        if (fs<16) {
531            // eg: 18 * 15  ->  15 * 15
532            vshift = vs - 15;
533            ms -= vshift;
534            xy = xyBB;
535        } else if (GGL_BETWEEN(fs, 24, 31)) {
536            // eg: 18 * 24  ->  15 * 8
537            vshift = vs - 15;
538            ms -= 16 + vshift;
539            xy = xyBT;
540        } else {
541            // eg: 18 * 18  ->  (15 * 18)>>16
542            fshift = fs - 15;
543            ms -= 16 + fshift;
544            xy = yB;    //XXX SMULWB
545            smulw = 1;
546        }
547    }
548
549    LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
550
551    int vreg = v.reg;
552    int freg = f.reg;
553    if (vshift) {
554        MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
555        vreg = d.reg;
556    }
557    if (fshift) {
558        MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
559        freg = d.reg;
560    }
561    if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
562    else        SMUL(AL, xy, d.reg, vreg, freg);
563
564
565    d.h = ms;
566    if (mDithering) {
567        d.l = 0;
568    } else {
569        d.l = fs;
570        d.flags |= CLEAR_LO;
571    }
572}
573
574void GGLAssembler::mul_factor_add(  component_t& d,
575                                    const integer_t& v,
576                                    const integer_t& f,
577                                    const component_t& a)
578{
579    // XXX: we could have special cases for 1 bit mul
580    Scratch scratches(registerFile());
581
582    int vs = v.size();
583    int fs = f.size();
584    int as = a.h;
585    int ms = vs+fs;
586
587    LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
588
589    integer_t add(a.reg, a.h, a.flags);
590
591    // 'a' is a component_t but it is guaranteed to have
592    // its high bits set to 0. However in the dithering case,
593    // we can't get away with truncating the potentially bad bits
594    // so extraction is needed.
595
596   if ((mDithering) && (a.size() < ms)) {
597        // we need to expand a
598        if (!(a.flags & CORRUPTIBLE)) {
599            // ... but it's not corruptible, so we need to pick a
600            // temporary register.
601            // Try to uses the destination register first (it's likely
602            // to be usable, unless it aliases an input).
603            if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
604                add.reg = d.reg;
605            } else {
606                add.reg = scratches.obtain();
607            }
608        }
609        expand(add, a, ms); // extracts and expands
610        as = ms;
611    }
612
613    if (ms == as) {
614        if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
615        else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
616    } else {
617        int temp = d.reg;
618        if (temp == add.reg) {
619            // the mul will modify add.reg, we need an intermediary reg
620            if (v.flags & CORRUPTIBLE)      temp = v.reg;
621            else if (f.flags & CORRUPTIBLE) temp = f.reg;
622            else                            temp = scratches.obtain();
623        }
624
625        if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
626        else                MUL(AL, 0, temp, v.reg, f.reg);
627
628        if (ms>as) {
629            ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
630        } else if (ms<as) {
631            // not sure if we should expand the mul instead?
632            ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
633        }
634    }
635
636    d.h = ms;
637    if (mDithering) {
638        d.l = a.l;
639    } else {
640        d.l = fs>a.l ? fs : a.l;
641        d.flags |= CLEAR_LO;
642    }
643}
644
645void GGLAssembler::component_add(component_t& d,
646        const integer_t& dst, const integer_t& src)
647{
648    // here we're guaranteed that fragment.size() >= fb.size()
649    const int shift = src.size() - dst.size();
650    if (!shift) {
651        ADD(AL, 0, d.reg, src.reg, dst.reg);
652    } else {
653        ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
654    }
655
656    d.h = src.size();
657    if (mDithering) {
658        d.l = 0;
659    } else {
660        d.l = shift;
661        d.flags |= CLEAR_LO;
662    }
663}
664
665void GGLAssembler::component_sat(const component_t& v)
666{
667    const int one = ((1<<v.size())-1)<<v.l;
668    CMP(AL, v.reg, imm( 1<<v.h ));
669    if (isValidImmediate(one)) {
670        MOV(HS, 0, v.reg, imm( one ));
671    } else if (isValidImmediate(~one)) {
672        MVN(HS, 0, v.reg, imm( ~one ));
673    } else {
674        MOV(HS, 0, v.reg, imm( 1<<v.h ));
675        SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
676    }
677}
678
679// ----------------------------------------------------------------------------
680
681}; // namespace android
682
683