blending.cpp revision 4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53
1/* libs/pixelflinger/codeflinger/blending.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdint.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <sys/types.h>
23
24#include <cutils/log.h>
25
26#include "codeflinger/GGLAssembler.h"
27
28
29namespace android {
30
31void GGLAssembler::build_fog(
32                        component_t& temp,      // incomming fragment / output
33                        int component,
34                        Scratch& regs)
35{
36   if (mInfo[component].fog) {
37        Scratch scratches(registerFile());
38        comment("fog");
39
40        integer_t fragment(temp.reg, temp.h, temp.flags);
41        if (!(temp.flags & CORRUPTIBLE)) {
42            temp.reg = regs.obtain();
43            temp.flags |= CORRUPTIBLE;
44        }
45
46        integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
47        LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
48                immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
49
50        integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
51        CONTEXT_LOAD(factor.reg, generated_vars.f);
52
53        build_blendFOneMinusF(temp, factor, fragment, fogColor);
54    }
55}
56
57void GGLAssembler::build_blending(
58                        component_t& temp,      // incomming fragment / output
59                        const pixel_t& pixel,   // framebuffer
60                        int component,
61                        Scratch& regs)
62{
63   if (!mInfo[component].blend)
64        return;
65
66    int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
67    int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
68    if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
69        fs = GGL_ONE;
70    const int blending = blending_codes(fs, fd);
71    if (!temp.size()) {
72        // here, blending will produce something which doesn't depend on
73        // that component (eg: GL_ZERO:GL_*), so the register has not been
74        // allocated yet. Will never be used as a source.
75        temp = component_t(regs.obtain(), CORRUPTIBLE);
76    }
77
78    // we are doing real blending...
79    // fb:          extracted dst
80    // fragment:    extracted src
81    // temp:        component_t(fragment) and result
82
83    // scoped register allocator
84    Scratch scratches(registerFile());
85    comment("blending");
86
87    // we can optimize these cases a bit...
88    // (1) saturation is not needed
89    // (2) we can use only one multiply instead of 2
90    // (3) we can reduce the register pressure
91    //      R = S*f + D*(1-f) = (S-D)*f + D
92    //      R = S*(1-f) + D*f = (D-S)*f + S
93
94    const bool same_factor_opt1 =
95        (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
96        (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
97        (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
98        (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
99
100    const bool same_factor_opt2 =
101        (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
102        (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
103        (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
104        (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
105
106
107    // XXX: we could also optimize these cases:
108    // R = S*f + D*f = (S+D)*f
109    // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
110    // R = S*D + D*S = 2*S*D
111
112
113    // see if we need to extract 'component' from the destination (fb)
114    integer_t fb;
115    if (blending & (BLEND_DST|FACTOR_DST)) {
116        fb.setTo(scratches.obtain(), 32);
117        extract(fb, pixel, component);
118        if (mDithering) {
119            // XXX: maybe what we should do instead, is simply
120            // expand fb -or- fragment to the larger of the two
121            if (fb.size() < temp.size()) {
122                // for now we expand 'fb' to min(fragment, 8)
123                int new_size = temp.size() < 8 ? temp.size() : 8;
124                expand(fb, fb, new_size);
125            }
126        }
127    }
128
129
130    // convert input fragment to integer_t
131    if (temp.l && (temp.flags & CORRUPTIBLE)) {
132        MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
133        temp.h -= temp.l;
134        temp.l = 0;
135    }
136    integer_t fragment(temp.reg, temp.size(), temp.flags);
137
138    // if not done yet, convert input fragment to integer_t
139    if (temp.l) {
140        // here we know temp is not CORRUPTIBLE
141        fragment.reg = scratches.obtain();
142        MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
143        fragment.flags |= CORRUPTIBLE;
144    }
145
146    if (!(temp.flags & CORRUPTIBLE)) {
147        // temp is not corruptible, but since it's the destination it
148        // will be modified, so we need to allocate a new register.
149        temp.reg = regs.obtain();
150        temp.flags &= ~CORRUPTIBLE;
151        fragment.flags &= ~CORRUPTIBLE;
152    }
153
154    if ((blending & BLEND_SRC) && !same_factor_opt1) {
155        // source (fragment) is needed for the blending stage
156        // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
157        fragment.flags &= ~CORRUPTIBLE;
158    }
159
160
161    if (same_factor_opt1) {
162        //  R = S*f + D*(1-f) = (S-D)*f + D
163        integer_t factor;
164        build_blend_factor(factor, fs,
165                component, pixel, fragment, fb, scratches);
166        // fb is always corruptible from this point
167        fb.flags |= CORRUPTIBLE;
168        build_blendFOneMinusF(temp, factor, fragment, fb);
169    } else if (same_factor_opt2) {
170        //  R = S*(1-f) + D*f = (D-S)*f + S
171        integer_t factor;
172        // fb is always corrruptible here
173        fb.flags |= CORRUPTIBLE;
174        build_blend_factor(factor, fd,
175                component, pixel, fragment, fb, scratches);
176        build_blendOneMinusFF(temp, factor, fragment, fb);
177    } else {
178        integer_t src_factor;
179        integer_t dst_factor;
180
181        // if destination (fb) is not needed for the blending stage,
182        // then it can be marked as CORRUPTIBLE
183        if (!(blending & BLEND_DST)) {
184            fb.flags |= CORRUPTIBLE;
185        }
186
187        // XXX: try to mark some registers as CORRUPTIBLE
188        // in most case we could make those corruptible
189        // when we're processing the last component
190        // but not always, for instance
191        //    when fragment is constant and not reloaded
192        //    when fb is needed for logic-ops or masking
193        //    when a register is aliased (for instance with mAlphaSource)
194
195        // blend away...
196        if (fs==GGL_ZERO) {
197            if (fd==GGL_ZERO) {         // R = 0
198                // already taken care of
199            } else if (fd==GGL_ONE) {   // R = D
200                // already taken care of
201            } else {                    // R = D*fd
202                // compute fd
203                build_blend_factor(dst_factor, fd,
204                        component, pixel, fragment, fb, scratches);
205                mul_factor(temp, fb, dst_factor);
206            }
207        } else if (fs==GGL_ONE) {
208            if (fd==GGL_ZERO) {         // R = S
209                // NOP, taken care of
210            } else if (fd==GGL_ONE) {   // R = S + D
211                component_add(temp, fb, fragment); // args order matters
212                component_sat(temp);
213            } else {                    // R = S + D*fd
214                // compute fd
215                build_blend_factor(dst_factor, fd,
216                        component, pixel, fragment, fb, scratches);
217                mul_factor_add(temp, fb, dst_factor, component_t(fragment));
218                if (fd==GGL_ONE_MINUS_SRC_ALPHA) {
219                    // XXX: in theory this is not correct, we should
220                    // saturate here. However, this mode is often
221                    // used for displaying alpha-premultiplied graphics,
222                    // in which case, saturation is not necessary.
223                    // unfortunatelly, we have no way to know.
224                    // This is a case, where we sacrifice correctness for
225                    // performance. we should probably have some heuristics.
226                } else {
227                    component_sat(temp);
228                }
229            }
230        } else {
231            // compute fs
232            build_blend_factor(src_factor, fs,
233                    component, pixel, fragment, fb, scratches);
234            if (fd==GGL_ZERO) {         // R = S*fs
235                mul_factor(temp, fragment, src_factor);
236            } else if (fd==GGL_ONE) {   // R = S*fs + D
237                mul_factor_add(temp, fragment, src_factor, component_t(fb));
238                component_sat(temp);
239            } else {                    // R = S*fs + D*fd
240                mul_factor(temp, fragment, src_factor);
241                if (scratches.isUsed(src_factor.reg))
242                    scratches.recycle(src_factor.reg);
243                // compute fd
244                build_blend_factor(dst_factor, fd,
245                        component, pixel, fragment, fb, scratches);
246                mul_factor_add(temp, fb, dst_factor, temp);
247                if (!same_factor_opt1 && !same_factor_opt2) {
248                    component_sat(temp);
249                }
250            }
251        }
252    }
253
254    // now we can be corrupted (it's the dest)
255    temp.flags |= CORRUPTIBLE;
256}
257
258void GGLAssembler::build_blend_factor(
259        integer_t& factor, int f, int component,
260        const pixel_t& dst_pixel,
261        integer_t& fragment,
262        integer_t& fb,
263        Scratch& scratches)
264{
265    integer_t src_alpha(fragment);
266
267    // src_factor/dst_factor won't be used after blending,
268    // so it's fine to mark them as CORRUPTIBLE (if not aliased)
269    factor.flags |= CORRUPTIBLE;
270
271    switch(f) {
272    case GGL_ONE_MINUS_SRC_ALPHA:
273    case GGL_SRC_ALPHA:
274        if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
275            // we're processing alpha, so we already have
276            // src-alpha in fragment, and we need src-alpha just this time.
277        } else {
278           // alpha-src will be needed for other components
279            if (!mBlendFactorCached || mBlendFactorCached==f) {
280                src_alpha = mAlphaSource;
281                factor = mAlphaSource;
282                factor.flags &= ~CORRUPTIBLE;
283                // we already computed the blend factor before, nothing to do.
284                if (mBlendFactorCached)
285                    return;
286                // this is the first time, make sure to compute the blend
287                // factor properly.
288                mBlendFactorCached = f;
289                break;
290            } else {
291                // we have a cached alpha blend factor, but we want another one,
292                // this should really not happen because by construction,
293                // we cannot have BOTH source and destination
294                // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
295                // the blending stage uses the f/(1-f) optimization
296
297                // for completeness, we handle this case though. Since there
298                // are only 2 choices, this meens we want "the other one"
299                // (1-factor)
300                factor = mAlphaSource;
301                factor.flags &= ~CORRUPTIBLE;
302                RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
303                mBlendFactorCached = f;
304                return;
305            }
306        }
307        // fall-through...
308    case GGL_ONE_MINUS_DST_COLOR:
309    case GGL_DST_COLOR:
310    case GGL_ONE_MINUS_SRC_COLOR:
311    case GGL_SRC_COLOR:
312    case GGL_ONE_MINUS_DST_ALPHA:
313    case GGL_DST_ALPHA:
314    case GGL_SRC_ALPHA_SATURATE:
315        // help us find out what register we can use for the blend-factor
316        // CORRUPTIBLE registers are chosen first, or a new one is allocated.
317        if (fragment.flags & CORRUPTIBLE) {
318            factor.setTo(fragment.reg, 32, CORRUPTIBLE);
319            fragment.flags &= ~CORRUPTIBLE;
320        } else if (fb.flags & CORRUPTIBLE) {
321            factor.setTo(fb.reg, 32, CORRUPTIBLE);
322            fb.flags &= ~CORRUPTIBLE;
323        } else {
324            factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
325        }
326        break;
327    }
328
329    // XXX: doesn't work if size==1
330
331    switch(f) {
332    case GGL_ONE_MINUS_DST_COLOR:
333    case GGL_DST_COLOR:
334        factor.s = fb.s;
335        ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
336        break;
337    case GGL_ONE_MINUS_SRC_COLOR:
338    case GGL_SRC_COLOR:
339        factor.s = fragment.s;
340        ADD(AL, 0, factor.reg, fragment.reg,
341            reg_imm(fragment.reg, LSR, fragment.s-1));
342        break;
343    case GGL_ONE_MINUS_SRC_ALPHA:
344    case GGL_SRC_ALPHA:
345        factor.s = src_alpha.s;
346        ADD(AL, 0, factor.reg, src_alpha.reg,
347                reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
348        break;
349    case GGL_ONE_MINUS_DST_ALPHA:
350    case GGL_DST_ALPHA:
351        // XXX: should be precomputed
352        extract(factor, dst_pixel, GGLFormat::ALPHA);
353        ADD(AL, 0, factor.reg, factor.reg,
354                reg_imm(factor.reg, LSR, factor.s-1));
355        break;
356    case GGL_SRC_ALPHA_SATURATE:
357        // XXX: should be precomputed
358        // XXX: f = min(As, 1-Ad)
359        // btw, we're guaranteed that Ad's size is <= 8, because
360        // it's extracted from the framebuffer
361        break;
362    }
363
364    switch(f) {
365    case GGL_ONE_MINUS_DST_COLOR:
366    case GGL_ONE_MINUS_SRC_COLOR:
367    case GGL_ONE_MINUS_DST_ALPHA:
368    case GGL_ONE_MINUS_SRC_ALPHA:
369        RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
370    }
371
372    // don't need more than 8-bits for the blend factor
373    // and this will prevent overflows in the multiplies later
374    if (factor.s > 8) {
375        MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
376        factor.s = 8;
377    }
378}
379
380int GGLAssembler::blending_codes(int fs, int fd)
381{
382    int blending = 0;
383    switch(fs) {
384    case GGL_ONE:
385        blending |= BLEND_SRC;
386        break;
387
388    case GGL_ONE_MINUS_DST_COLOR:
389    case GGL_DST_COLOR:
390        blending |= FACTOR_DST|BLEND_SRC;
391        break;
392    case GGL_ONE_MINUS_DST_ALPHA:
393    case GGL_DST_ALPHA:
394        // no need to extract 'component' from the destination
395        // for the blend factor, because we need ALPHA only.
396        blending |= BLEND_SRC;
397        break;
398
399    case GGL_ONE_MINUS_SRC_COLOR:
400    case GGL_SRC_COLOR:
401        blending |= FACTOR_SRC|BLEND_SRC;
402        break;
403    case GGL_ONE_MINUS_SRC_ALPHA:
404    case GGL_SRC_ALPHA:
405    case GGL_SRC_ALPHA_SATURATE:
406        blending |= FACTOR_SRC|BLEND_SRC;
407        break;
408    }
409    switch(fd) {
410    case GGL_ONE:
411        blending |= BLEND_DST;
412        break;
413
414    case GGL_ONE_MINUS_DST_COLOR:
415    case GGL_DST_COLOR:
416        blending |= FACTOR_DST|BLEND_DST;
417        break;
418    case GGL_ONE_MINUS_DST_ALPHA:
419    case GGL_DST_ALPHA:
420        blending |= FACTOR_DST|BLEND_DST;
421        break;
422
423    case GGL_ONE_MINUS_SRC_COLOR:
424    case GGL_SRC_COLOR:
425        blending |= FACTOR_SRC|BLEND_DST;
426        break;
427    case GGL_ONE_MINUS_SRC_ALPHA:
428    case GGL_SRC_ALPHA:
429        // no need to extract 'component' from the source
430        // for the blend factor, because we need ALPHA only.
431        blending |= BLEND_DST;
432        break;
433    }
434    return blending;
435}
436
437// ---------------------------------------------------------------------------
438
439void GGLAssembler::build_blendFOneMinusF(
440        component_t& temp,
441        const integer_t& factor,
442        const integer_t& fragment,
443        const integer_t& fb)
444{
445    //  R = S*f + D*(1-f) = (S-D)*f + D
446    Scratch scratches(registerFile());
447    // compute S-D
448    integer_t diff(fragment.flags & CORRUPTIBLE ?
449            fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
450    const int shift = fragment.size() - fb.size();
451    if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
452    else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
453    else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
454    mul_factor_add(temp, diff, factor, component_t(fb));
455}
456
457void GGLAssembler::build_blendOneMinusFF(
458        component_t& temp,
459        const integer_t& factor,
460        const integer_t& fragment,
461        const integer_t& fb)
462{
463    //  R = S*f + D*(1-f) = (S-D)*f + D
464    Scratch scratches(registerFile());
465    // compute D-S
466    integer_t diff(fb.flags & CORRUPTIBLE ?
467            fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
468    const int shift = fragment.size() - fb.size();
469    if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
470    else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
471    else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
472    mul_factor_add(temp, diff, factor, component_t(fragment));
473}
474
475// ---------------------------------------------------------------------------
476
477void GGLAssembler::mul_factor(  component_t& d,
478                                const integer_t& v,
479                                const integer_t& f)
480{
481    int vs = v.size();
482    int fs = f.size();
483    int ms = vs+fs;
484
485    // XXX: we could have special cases for 1 bit mul
486
487    // all this code below to use the best multiply instruction
488    // wrt the parameters size. We take advantage of the fact
489    // that the 16-bits multiplies allow a 16-bit shift
490    // The trick is that we just make sure that we have at least 8-bits
491    // per component (which is enough for a 8 bits display).
492
493    int xy;
494    int vshift = 0;
495    int fshift = 0;
496    int smulw = 0;
497
498    if (vs<16) {
499        if (fs<16) {
500            xy = xyBB;
501        } else if (GGL_BETWEEN(fs, 24, 31)) {
502            ms -= 16;
503            xy = xyTB;
504        } else {
505            // eg: 15 * 18  ->  15 * 15
506            fshift = fs - 15;
507            ms -= fshift;
508            xy = xyBB;
509        }
510    } else if (GGL_BETWEEN(vs, 24, 31)) {
511        if (fs<16) {
512            ms -= 16;
513            xy = xyTB;
514        } else if (GGL_BETWEEN(fs, 24, 31)) {
515            ms -= 32;
516            xy = xyTT;
517        } else {
518            // eg: 24 * 18  ->  8 * 18
519            fshift = fs - 15;
520            ms -= 16 + fshift;
521            xy = xyTB;
522        }
523    } else {
524        if (fs<16) {
525            // eg: 18 * 15  ->  15 * 15
526            vshift = vs - 15;
527            ms -= vshift;
528            xy = xyBB;
529        } else if (GGL_BETWEEN(fs, 24, 31)) {
530            // eg: 18 * 24  ->  15 * 8
531            vshift = vs - 15;
532            ms -= 16 + vshift;
533            xy = xyBT;
534        } else {
535            // eg: 18 * 18  ->  (15 * 18)>>16
536            fshift = fs - 15;
537            ms -= 16 + fshift;
538            xy = yB;    //XXX SMULWB
539            smulw = 1;
540        }
541    }
542
543    LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
544
545    int vreg = v.reg;
546    int freg = f.reg;
547    if (vshift) {
548        MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
549        vreg = d.reg;
550    }
551    if (fshift) {
552        MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
553        freg = d.reg;
554    }
555    if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
556    else        SMUL(AL, xy, d.reg, vreg, freg);
557
558
559    d.h = ms;
560    if (mDithering) {
561        d.l = 0;
562    } else {
563        d.l = fs;
564        d.flags |= CLEAR_LO;
565    }
566}
567
568void GGLAssembler::mul_factor_add(  component_t& d,
569                                    const integer_t& v,
570                                    const integer_t& f,
571                                    const component_t& a)
572{
573    // XXX: we could have special cases for 1 bit mul
574    Scratch scratches(registerFile());
575
576    int vs = v.size();
577    int fs = f.size();
578    int as = a.h;
579    int ms = vs+fs;
580
581    LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
582
583    integer_t add(a.reg, a.h, a.flags);
584
585    // 'a' is a component_t but it is guaranteed to have
586    // its high bits set to 0. However in the dithering case,
587    // we can't get away with truncating the potentially bad bits
588    // so extraction is needed.
589
590   if ((mDithering) && (a.size() < ms)) {
591        // we need to expand a
592        if (!(a.flags & CORRUPTIBLE)) {
593            // ... but it's not corruptible, so we need to pick a
594            // temporary register.
595            // Try to uses the destination register first (it's likely
596            // to be usable, unless it aliases an input).
597            if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
598                add.reg = d.reg;
599            } else {
600                add.reg = scratches.obtain();
601            }
602        }
603        expand(add, a, ms); // extracts and expands
604        as = ms;
605    }
606
607    if (ms == as) {
608        if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
609        else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
610    } else {
611        int temp = d.reg;
612        if (temp == add.reg) {
613            // the mul will modify add.reg, we need an intermediary reg
614            if (v.flags & CORRUPTIBLE)      temp = v.reg;
615            else if (f.flags & CORRUPTIBLE) temp = f.reg;
616            else                            temp = scratches.obtain();
617        }
618
619        if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
620        else                MUL(AL, 0, temp, v.reg, f.reg);
621
622        if (ms>as) {
623            ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
624        } else if (ms<as) {
625            // not sure if we should expand the mul instead?
626            ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
627        }
628    }
629
630    d.h = ms;
631    if (mDithering) {
632        d.l = a.l;
633    } else {
634        d.l = fs>a.l ? fs : a.l;
635        d.flags |= CLEAR_LO;
636    }
637}
638
639void GGLAssembler::component_add(component_t& d,
640        const integer_t& dst, const integer_t& src)
641{
642    // here we're guaranteed that fragment.size() >= fb.size()
643    const int shift = src.size() - dst.size();
644    if (!shift) {
645        ADD(AL, 0, d.reg, src.reg, dst.reg);
646    } else {
647        ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
648    }
649
650    d.h = src.size();
651    if (mDithering) {
652        d.l = 0;
653    } else {
654        d.l = shift;
655        d.flags |= CLEAR_LO;
656    }
657}
658
659void GGLAssembler::component_sat(const component_t& v)
660{
661    const int one = ((1<<v.size())-1)<<v.l;
662    CMP(AL, v.reg, imm( 1<<v.h ));
663    if (isValidImmediate(one)) {
664        MOV(HS, 0, v.reg, imm( one ));
665    } else if (isValidImmediate(~one)) {
666        MVN(HS, 0, v.reg, imm( ~one ));
667    } else {
668        MOV(HS, 0, v.reg, imm( 1<<v.h ));
669        SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
670    }
671}
672
673// ----------------------------------------------------------------------------
674
675}; // namespace android
676
677