load_store.cpp revision 4dc1fa8e8d1f14868ab8bba93a8cbb87f847c4e3
1/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
21#include "codeflinger/GGLAssembler.h"
22
23#include <machine/cpu-features.h>
24
25namespace android {
26
27// ----------------------------------------------------------------------------
28
29void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
30{
31    const int bits = addr.size;
32    const int inc = (flags & WRITE_BACK)?1:0;
33    switch (bits) {
34    case 32:
35        if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
36        else        STR(AL, s.reg, addr.reg);
37        break;
38    case 24:
39        // 24 bits formats are a little special and used only for RGB
40        // 0x00BBGGRR is unpacked as R,G,B
41        STRB(AL, s.reg, addr.reg, immed12_pre(0));
42        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
43        STRB(AL, s.reg, addr.reg, immed12_pre(1));
44        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45        STRB(AL, s.reg, addr.reg, immed12_pre(2));
46        if (!(s.flags & CORRUPTIBLE)) {
47            MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
48        }
49        if (inc)
50            ADD(AL, 0, addr.reg, addr.reg, imm(3));
51        break;
52    case 16:
53        if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
54        else        STRH(AL, s.reg, addr.reg);
55        break;
56    case  8:
57        if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
58        else        STRB(AL, s.reg, addr.reg);
59        break;
60    }
61}
62
63void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
64{
65    Scratch scratches(registerFile());
66    int s0;
67
68    const int bits = addr.size;
69    const int inc = (flags & WRITE_BACK)?1:0;
70    switch (bits) {
71    case 32:
72        if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
73        else        LDR(AL, s.reg, addr.reg);
74        break;
75    case 24:
76        // 24 bits formats are a little special and used only for RGB
77        // R,G,B is packed as 0x00BBGGRR
78        s0 = scratches.obtain();
79        if (s.reg != addr.reg) {
80            LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
81            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
82            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
83            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
84            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
85        } else {
86            int s1 = scratches.obtain();
87            LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
88            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
89            ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
90            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
91            ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
92        }
93        if (inc)
94            ADD(AL, 0, addr.reg, addr.reg, imm(3));
95        break;
96    case 16:
97        if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
98        else        LDRH(AL, s.reg, addr.reg);
99        break;
100    case  8:
101        if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
102        else        LDRB(AL, s.reg, addr.reg);
103        break;
104    }
105}
106
107void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
108{
109    const int maskLen = h-l;
110
111    assert(maskLen<=8);
112    assert(h);
113
114#if __ARM_ARCH__ >= 7
115    const int mask = (1<<maskLen)-1;
116    if ((h == bits) && !l && (s != d.reg)) {
117        MOV(AL, 0, d.reg, s);                   // component = packed;
118    } else if ((h == bits) && l) {
119        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
120    } else if (!l && isValidImmediate(mask)) {
121        AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
122    } else if (!l && isValidImmediate(~mask)) {
123        BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
124    } else {
125        UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
126    }
127#else
128    if (h != bits) {
129        const int mask = ((1<<maskLen)-1) << l;
130        if (isValidImmediate(mask)) {
131            AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
132        } else if (isValidImmediate(~mask)) {
133            BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
134        } else {
135            MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
136            l += 32-h;
137            h = 32;
138        }
139        s = d.reg;
140    }
141
142    if (l) {
143        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
144        s = d.reg;
145    }
146
147    if (s != d.reg) {
148        MOV(AL, 0, d.reg, s);
149    }
150#endif
151
152    d.s = maskLen;
153}
154
155void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
156{
157    extract(d,  s.reg,
158                s.format.c[component].h,
159                s.format.c[component].l,
160                s.size());
161}
162
163void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
164{
165    integer_t r(d.reg, 32, d.flags);
166    extract(r,  s.reg,
167                s.format.c[component].h,
168                s.format.c[component].l,
169                s.size());
170    d = component_t(r);
171}
172
173
174void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
175{
176    if (s.l || (s.flags & CLEAR_HI)) {
177        extract(d, s.reg, s.h, s.l, 32);
178        expand(d, d, dbits);
179    } else {
180        expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
181    }
182}
183
184void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
185{
186    integer_t r(d.reg, 32, d.flags);
187    expand(r, s, dbits);
188    d = component_t(r);
189}
190
191void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
192{
193    assert(src.size());
194
195    int sbits = src.size();
196    int s = src.reg;
197    int d = dst.reg;
198
199    // be sure to set 'dst' after we read 'src' as they may be identical
200    dst.s = dbits;
201    dst.flags = 0;
202
203    if (dbits<=sbits) {
204        if (s != d) {
205            MOV(AL, 0, d, s);
206        }
207        return;
208    }
209
210    if (sbits == 1) {
211        RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
212            // d = (s<<dbits) - s;
213        return;
214    }
215
216    if (dbits % sbits) {
217        MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
218            // d = s << (dbits-sbits);
219        dbits -= sbits;
220        do {
221            ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
222                // d |= d >> sbits;
223            dbits -= sbits;
224            sbits *= 2;
225        } while(dbits>0);
226        return;
227    }
228
229    dbits -= sbits;
230    do {
231        ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
232            // d |= d<<sbits;
233        s = d;
234        dbits -= sbits;
235        if (sbits*2 < dbits) {
236            sbits *= 2;
237        }
238    } while(dbits>0);
239}
240
241void GGLAssembler::downshift(
242        pixel_t& d, int component, component_t s, const reg_t& dither)
243{
244    const needs_t& needs = mBuilderContext.needs;
245    Scratch scratches(registerFile());
246
247    int sh = s.h;
248    int sl = s.l;
249    int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
250    int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
251    int sbits = sh - sl;
252
253    int dh = d.format.c[component].h;
254    int dl = d.format.c[component].l;
255    int dbits = dh - dl;
256    int dithering = 0;
257
258    LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
259
260    if (sbits>dbits) {
261        // see if we need to dither
262        dithering = mDithering;
263    }
264
265    int ireg = d.reg;
266    if (!(d.flags & FIRST)) {
267        if (s.flags & CORRUPTIBLE)  {
268            ireg = s.reg;
269        } else {
270            ireg = scratches.obtain();
271        }
272    }
273    d.flags &= ~FIRST;
274
275    if (maskHiBits) {
276        // we need to mask the high bits (and possibly the lowbits too)
277        // and we might be able to use immediate mask.
278        if (!dithering) {
279            // we don't do this if we only have maskLoBits because we can
280            // do it more efficiently below (in the case where dl=0)
281            const int offset = sh - dbits;
282            if (dbits<=8 && offset >= 0) {
283                const uint32_t mask = ((1<<dbits)-1) << offset;
284                if (isValidImmediate(mask) || isValidImmediate(~mask)) {
285                    build_and_immediate(ireg, s.reg, mask, 32);
286                    sl = offset;
287                    s.reg = ireg;
288                    sbits = dbits;
289                    maskLoBits = maskHiBits = 0;
290                }
291            }
292        } else {
293            // in the dithering case though, we need to preserve the lower bits
294            const uint32_t mask = ((1<<sbits)-1) << sl;
295            if (isValidImmediate(mask) || isValidImmediate(~mask)) {
296                build_and_immediate(ireg, s.reg, mask, 32);
297                s.reg = ireg;
298                maskLoBits = maskHiBits = 0;
299            }
300        }
301    }
302
303    // XXX: we could special case (maskHiBits & !maskLoBits)
304    // like we do for maskLoBits below, but it happens very rarely
305    // that we have maskHiBits only and the conditions necessary to lead
306    // to better code (like doing d |= s << 24)
307
308    if (maskHiBits) {
309        MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
310        sl += 32-sh;
311        sh = 32;
312        s.reg = ireg;
313        maskHiBits = 0;
314    }
315
316    //	Downsampling should be performed as follows:
317    //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
318    //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
319    //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
320    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
321    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
322    //
323    //	By approximating (1>>dbits) and (1>>sbits) to 0:
324    //
325    //		V>>(sbits-dbits)	-	V>>sbits
326    //
327	//  A good approximation is V>>(sbits-dbits),
328    //  but better one (needed for dithering) is:
329    //
330    //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
331    //		(V<<dbits	-	V)>>sbits
332    //		(V	-	V>>dbits)>>(sbits-dbits)
333
334    // Dithering is done here
335    if (dithering) {
336        comment("dithering");
337        if (sl) {
338            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
339            sh -= sl;
340            sl = 0;
341            s.reg = ireg;
342        }
343        // scaling (V-V>>dbits)
344        SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
345        const int shift = (GGL_DITHER_BITS - (sbits-dbits));
346        if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
347        else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
348        else                ADD(AL, 0, ireg, ireg, dither.reg);
349        s.reg = ireg;
350    }
351
352    if ((maskLoBits|dithering) && (sh > dbits)) {
353        int shift = sh-dbits;
354        if (dl) {
355            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
356            if (ireg == d.reg) {
357                MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
358            } else {
359                ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
360            }
361        } else {
362            if (ireg == d.reg) {
363                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
364            } else {
365                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
366            }
367        }
368    } else {
369        int shift = sh-dh;
370        if (shift>0) {
371            if (ireg == d.reg) {
372                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
373            } else {
374                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
375            }
376        } else if (shift<0) {
377            if (ireg == d.reg) {
378                MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
379            } else {
380                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
381            }
382        } else {
383            if (ireg == d.reg) {
384                if (s.reg != d.reg) {
385                    MOV(AL, 0, d.reg, s.reg);
386                }
387            } else {
388                ORR(AL, 0, d.reg, d.reg, s.reg);
389            }
390        }
391    }
392}
393
394}; // namespace android
395