load_store.cpp revision 01dda204cd28fe181691b4a44a51be7e5666d0c8
1/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
21#include "codeflinger/GGLAssembler.h"
22
23#ifdef __ARM_ARCH__
24#include <machine/cpu-features.h>
25#endif
26
27namespace android {
28
29// ----------------------------------------------------------------------------
30
31void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32{
33    const int bits = addr.size;
34    const int inc = (flags & WRITE_BACK)?1:0;
35    switch (bits) {
36    case 32:
37        if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38        else        STR(AL, s.reg, addr.reg);
39        break;
40    case 24:
41        // 24 bits formats are a little special and used only for RGB
42        // 0x00BBGGRR is unpacked as R,G,B
43        STRB(AL, s.reg, addr.reg, immed12_pre(0));
44        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45        STRB(AL, s.reg, addr.reg, immed12_pre(1));
46        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47        STRB(AL, s.reg, addr.reg, immed12_pre(2));
48        if (!(s.flags & CORRUPTIBLE)) {
49            MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50        }
51        if (inc)
52            ADD(AL, 0, addr.reg, addr.reg, imm(3));
53        break;
54    case 16:
55        if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56        else        STRH(AL, s.reg, addr.reg);
57        break;
58    case  8:
59        if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60        else        STRB(AL, s.reg, addr.reg);
61        break;
62    }
63}
64
65void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66{
67    Scratch scratches(registerFile());
68    int s0;
69
70    const int bits = addr.size;
71    const int inc = (flags & WRITE_BACK)?1:0;
72    switch (bits) {
73    case 32:
74        if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75        else        LDR(AL, s.reg, addr.reg);
76        break;
77    case 24:
78        // 24 bits formats are a little special and used only for RGB
79        // R,G,B is packed as 0x00BBGGRR
80        s0 = scratches.obtain();
81        if (s.reg != addr.reg) {
82            LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87        } else {
88            int s1 = scratches.obtain();
89            LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91            ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93            ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94        }
95        if (inc)
96            ADD(AL, 0, addr.reg, addr.reg, imm(3));
97        break;
98    case 16:
99        if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100        else        LDRH(AL, s.reg, addr.reg);
101        break;
102    case  8:
103        if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104        else        LDRB(AL, s.reg, addr.reg);
105        break;
106    }
107}
108
109void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110{
111    const int maskLen = h-l;
112
113    assert(maskLen<=8);
114    assert(h);
115
116#if __ARM_ARCH__ >= 7
117    const int mask = (1<<maskLen)-1;
118    if ((h == bits) && !l && (s != d.reg)) {
119        MOV(AL, 0, d.reg, s);                   // component = packed;
120    } else if ((h == bits) && l) {
121        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
122    } else if (!l && isValidImmediate(mask)) {
123        AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
124    } else if (!l && isValidImmediate(~mask)) {
125        BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
126    } else {
127        UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
128    }
129#else
130    if (h != bits) {
131        const int mask = ((1<<maskLen)-1) << l;
132        if (isValidImmediate(mask)) {
133            AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
134        } else if (isValidImmediate(~mask)) {
135            BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
136        } else {
137            MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
138            l += 32-h;
139            h = 32;
140        }
141        s = d.reg;
142    }
143
144    if (l) {
145        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
146        s = d.reg;
147    }
148
149    if (s != d.reg) {
150        MOV(AL, 0, d.reg, s);
151    }
152#endif
153
154    d.s = maskLen;
155}
156
157void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
158{
159    extract(d,  s.reg,
160                s.format.c[component].h,
161                s.format.c[component].l,
162                s.size());
163}
164
165void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
166{
167    integer_t r(d.reg, 32, d.flags);
168    extract(r,  s.reg,
169                s.format.c[component].h,
170                s.format.c[component].l,
171                s.size());
172    d = component_t(r);
173}
174
175
176void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
177{
178    if (s.l || (s.flags & CLEAR_HI)) {
179        extract(d, s.reg, s.h, s.l, 32);
180        expand(d, d, dbits);
181    } else {
182        expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
183    }
184}
185
186void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
187{
188    integer_t r(d.reg, 32, d.flags);
189    expand(r, s, dbits);
190    d = component_t(r);
191}
192
193void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
194{
195    assert(src.size());
196
197    int sbits = src.size();
198    int s = src.reg;
199    int d = dst.reg;
200
201    // be sure to set 'dst' after we read 'src' as they may be identical
202    dst.s = dbits;
203    dst.flags = 0;
204
205    if (dbits<=sbits) {
206        if (s != d) {
207            MOV(AL, 0, d, s);
208        }
209        return;
210    }
211
212    if (sbits == 1) {
213        RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
214            // d = (s<<dbits) - s;
215        return;
216    }
217
218    if (dbits % sbits) {
219        MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
220            // d = s << (dbits-sbits);
221        dbits -= sbits;
222        do {
223            ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
224                // d |= d >> sbits;
225            dbits -= sbits;
226            sbits *= 2;
227        } while(dbits>0);
228        return;
229    }
230
231    dbits -= sbits;
232    do {
233        ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
234            // d |= d<<sbits;
235        s = d;
236        dbits -= sbits;
237        if (sbits*2 < dbits) {
238            sbits *= 2;
239        }
240    } while(dbits>0);
241}
242
243void GGLAssembler::downshift(
244        pixel_t& d, int component, component_t s, const reg_t& dither)
245{
246    const needs_t& needs = mBuilderContext.needs;
247    Scratch scratches(registerFile());
248
249    int sh = s.h;
250    int sl = s.l;
251    int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
252    int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
253    int sbits = sh - sl;
254
255    int dh = d.format.c[component].h;
256    int dl = d.format.c[component].l;
257    int dbits = dh - dl;
258    int dithering = 0;
259
260    ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
261
262    if (sbits>dbits) {
263        // see if we need to dither
264        dithering = mDithering;
265    }
266
267    int ireg = d.reg;
268    if (!(d.flags & FIRST)) {
269        if (s.flags & CORRUPTIBLE)  {
270            ireg = s.reg;
271        } else {
272            ireg = scratches.obtain();
273        }
274    }
275    d.flags &= ~FIRST;
276
277    if (maskHiBits) {
278        // we need to mask the high bits (and possibly the lowbits too)
279        // and we might be able to use immediate mask.
280        if (!dithering) {
281            // we don't do this if we only have maskLoBits because we can
282            // do it more efficiently below (in the case where dl=0)
283            const int offset = sh - dbits;
284            if (dbits<=8 && offset >= 0) {
285                const uint32_t mask = ((1<<dbits)-1) << offset;
286                if (isValidImmediate(mask) || isValidImmediate(~mask)) {
287                    build_and_immediate(ireg, s.reg, mask, 32);
288                    sl = offset;
289                    s.reg = ireg;
290                    sbits = dbits;
291                    maskLoBits = maskHiBits = 0;
292                }
293            }
294        } else {
295            // in the dithering case though, we need to preserve the lower bits
296            const uint32_t mask = ((1<<sbits)-1) << sl;
297            if (isValidImmediate(mask) || isValidImmediate(~mask)) {
298                build_and_immediate(ireg, s.reg, mask, 32);
299                s.reg = ireg;
300                maskLoBits = maskHiBits = 0;
301            }
302        }
303    }
304
305    // XXX: we could special case (maskHiBits & !maskLoBits)
306    // like we do for maskLoBits below, but it happens very rarely
307    // that we have maskHiBits only and the conditions necessary to lead
308    // to better code (like doing d |= s << 24)
309
310    if (maskHiBits) {
311        MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
312        sl += 32-sh;
313        sh = 32;
314        s.reg = ireg;
315        maskHiBits = 0;
316    }
317
318    //	Downsampling should be performed as follows:
319    //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
320    //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
321    //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
322    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
323    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
324    //
325    //	By approximating (1>>dbits) and (1>>sbits) to 0:
326    //
327    //		V>>(sbits-dbits)	-	V>>sbits
328    //
329	//  A good approximation is V>>(sbits-dbits),
330    //  but better one (needed for dithering) is:
331    //
332    //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
333    //		(V<<dbits	-	V)>>sbits
334    //		(V	-	V>>dbits)>>(sbits-dbits)
335
336    // Dithering is done here
337    if (dithering) {
338        comment("dithering");
339        if (sl) {
340            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
341            sh -= sl;
342            sl = 0;
343            s.reg = ireg;
344        }
345        // scaling (V-V>>dbits)
346        SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
347        const int shift = (GGL_DITHER_BITS - (sbits-dbits));
348        if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
349        else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
350        else                ADD(AL, 0, ireg, ireg, dither.reg);
351        s.reg = ireg;
352    }
353
354    if ((maskLoBits|dithering) && (sh > dbits)) {
355        int shift = sh-dbits;
356        if (dl) {
357            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
358            if (ireg == d.reg) {
359                MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
360            } else {
361                ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
362            }
363        } else {
364            if (ireg == d.reg) {
365                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
366            } else {
367                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
368            }
369        }
370    } else {
371        int shift = sh-dh;
372        if (shift>0) {
373            if (ireg == d.reg) {
374                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
375            } else {
376                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
377            }
378        } else if (shift<0) {
379            if (ireg == d.reg) {
380                MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
381            } else {
382                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
383            }
384        } else {
385            if (ireg == d.reg) {
386                if (s.reg != d.reg) {
387                    MOV(AL, 0, d.reg, s.reg);
388                }
389            } else {
390                ORR(AL, 0, d.reg, d.reg, s.reg);
391            }
392        }
393    }
394}
395
396}; // namespace android
397