1/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
21#include "codeflinger/GGLAssembler.h"
22
23#ifdef __ARM_ARCH__
24#include <machine/cpu-features.h>
25#endif
26
27namespace android {
28
29// ----------------------------------------------------------------------------
30
31void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32{
33    const int bits = addr.size;
34    const int inc = (flags & WRITE_BACK)?1:0;
35    switch (bits) {
36    case 32:
37        if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38        else        STR(AL, s.reg, addr.reg);
39        break;
40    case 24:
41        // 24 bits formats are a little special and used only for RGB
42        // 0x00BBGGRR is unpacked as R,G,B
43        STRB(AL, s.reg, addr.reg, immed12_pre(0));
44        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45        STRB(AL, s.reg, addr.reg, immed12_pre(1));
46        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47        STRB(AL, s.reg, addr.reg, immed12_pre(2));
48        if (!(s.flags & CORRUPTIBLE)) {
49            MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50        }
51        if (inc)
52            ADD(AL, 0, addr.reg, addr.reg, imm(3));
53        break;
54    case 16:
55        if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56        else        STRH(AL, s.reg, addr.reg);
57        break;
58    case  8:
59        if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60        else        STRB(AL, s.reg, addr.reg);
61        break;
62    }
63}
64
65void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66{
67    Scratch scratches(registerFile());
68    int s0;
69
70    const int bits = addr.size;
71    const int inc = (flags & WRITE_BACK)?1:0;
72    switch (bits) {
73    case 32:
74        if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75        else        LDR(AL, s.reg, addr.reg);
76        break;
77    case 24:
78        // 24 bits formats are a little special and used only for RGB
79        // R,G,B is packed as 0x00BBGGRR
80        s0 = scratches.obtain();
81        if (s.reg != addr.reg) {
82            LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87        } else {
88            int s1 = scratches.obtain();
89            LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91            ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93            ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94        }
95        if (inc)
96            ADD(AL, 0, addr.reg, addr.reg, imm(3));
97        break;
98    case 16:
99        if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100        else        LDRH(AL, s.reg, addr.reg);
101        break;
102    case  8:
103        if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104        else        LDRB(AL, s.reg, addr.reg);
105        break;
106    }
107}
108
109void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110{
111    const int maskLen = h-l;
112
113#ifdef __mips__
114    assert(maskLen<=11);
115#else
116    assert(maskLen<=8);
117#endif
118    assert(h);
119
120#if __ARM_ARCH__ >= 7
121    const int mask = (1<<maskLen)-1;
122    if ((h == bits) && !l && (s != d.reg)) {
123        MOV(AL, 0, d.reg, s);                   // component = packed;
124    } else if ((h == bits) && l) {
125        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
126    } else if (!l && isValidImmediate(mask)) {
127        AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
128    } else if (!l && isValidImmediate(~mask)) {
129        BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
130    } else {
131        UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
132    }
133#else
134    if (h != bits) {
135        const int mask = ((1<<maskLen)-1) << l;
136        if (isValidImmediate(mask)) {
137            AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
138        } else if (isValidImmediate(~mask)) {
139            BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
140        } else {
141            MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
142            l += 32-h;
143            h = 32;
144        }
145        s = d.reg;
146    }
147
148    if (l) {
149        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
150        s = d.reg;
151    }
152
153    if (s != d.reg) {
154        MOV(AL, 0, d.reg, s);
155    }
156#endif
157
158    d.s = maskLen;
159}
160
161void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
162{
163    extract(d,  s.reg,
164                s.format.c[component].h,
165                s.format.c[component].l,
166                s.size());
167}
168
169void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
170{
171    integer_t r(d.reg, 32, d.flags);
172    extract(r,  s.reg,
173                s.format.c[component].h,
174                s.format.c[component].l,
175                s.size());
176    d = component_t(r);
177}
178
179
180void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
181{
182    if (s.l || (s.flags & CLEAR_HI)) {
183        extract(d, s.reg, s.h, s.l, 32);
184        expand(d, d, dbits);
185    } else {
186        expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
187    }
188}
189
190void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
191{
192    integer_t r(d.reg, 32, d.flags);
193    expand(r, s, dbits);
194    d = component_t(r);
195}
196
197void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
198{
199    assert(src.size());
200
201    int sbits = src.size();
202    int s = src.reg;
203    int d = dst.reg;
204
205    // be sure to set 'dst' after we read 'src' as they may be identical
206    dst.s = dbits;
207    dst.flags = 0;
208
209    if (dbits<=sbits) {
210        if (s != d) {
211            MOV(AL, 0, d, s);
212        }
213        return;
214    }
215
216    if (sbits == 1) {
217        RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
218            // d = (s<<dbits) - s;
219        return;
220    }
221
222    if (dbits % sbits) {
223        MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
224            // d = s << (dbits-sbits);
225        dbits -= sbits;
226        do {
227            ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
228                // d |= d >> sbits;
229            dbits -= sbits;
230            sbits *= 2;
231        } while(dbits>0);
232        return;
233    }
234
235    dbits -= sbits;
236    do {
237        ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
238            // d |= d<<sbits;
239        s = d;
240        dbits -= sbits;
241        if (sbits*2 < dbits) {
242            sbits *= 2;
243        }
244    } while(dbits>0);
245}
246
247void GGLAssembler::downshift(
248        pixel_t& d, int component, component_t s, const reg_t& dither)
249{
250    const needs_t& needs = mBuilderContext.needs;
251    Scratch scratches(registerFile());
252
253    int sh = s.h;
254    int sl = s.l;
255    int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
256    int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
257    int sbits = sh - sl;
258
259    int dh = d.format.c[component].h;
260    int dl = d.format.c[component].l;
261    int dbits = dh - dl;
262    int dithering = 0;
263
264    ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
265
266    if (sbits>dbits) {
267        // see if we need to dither
268        dithering = mDithering;
269    }
270
271    int ireg = d.reg;
272    if (!(d.flags & FIRST)) {
273        if (s.flags & CORRUPTIBLE)  {
274            ireg = s.reg;
275        } else {
276            ireg = scratches.obtain();
277        }
278    }
279    d.flags &= ~FIRST;
280
281    if (maskHiBits) {
282        // we need to mask the high bits (and possibly the lowbits too)
283        // and we might be able to use immediate mask.
284        if (!dithering) {
285            // we don't do this if we only have maskLoBits because we can
286            // do it more efficiently below (in the case where dl=0)
287            const int offset = sh - dbits;
288            if (dbits<=8 && offset >= 0) {
289                const uint32_t mask = ((1<<dbits)-1) << offset;
290                if (isValidImmediate(mask) || isValidImmediate(~mask)) {
291                    build_and_immediate(ireg, s.reg, mask, 32);
292                    sl = offset;
293                    s.reg = ireg;
294                    sbits = dbits;
295                    maskLoBits = maskHiBits = 0;
296                }
297            }
298        } else {
299            // in the dithering case though, we need to preserve the lower bits
300            const uint32_t mask = ((1<<sbits)-1) << sl;
301            if (isValidImmediate(mask) || isValidImmediate(~mask)) {
302                build_and_immediate(ireg, s.reg, mask, 32);
303                s.reg = ireg;
304                maskLoBits = maskHiBits = 0;
305            }
306        }
307    }
308
309    // XXX: we could special case (maskHiBits & !maskLoBits)
310    // like we do for maskLoBits below, but it happens very rarely
311    // that we have maskHiBits only and the conditions necessary to lead
312    // to better code (like doing d |= s << 24)
313
314    if (maskHiBits) {
315        MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
316        sl += 32-sh;
317        sh = 32;
318        s.reg = ireg;
319        maskHiBits = 0;
320    }
321
322    //	Downsampling should be performed as follows:
323    //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
324    //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
325    //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
326    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
327    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
328    //
329    //	By approximating (1>>dbits) and (1>>sbits) to 0:
330    //
331    //		V>>(sbits-dbits)	-	V>>sbits
332    //
333	//  A good approximation is V>>(sbits-dbits),
334    //  but better one (needed for dithering) is:
335    //
336    //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
337    //		(V<<dbits	-	V)>>sbits
338    //		(V	-	V>>dbits)>>(sbits-dbits)
339
340    // Dithering is done here
341    if (dithering) {
342        comment("dithering");
343        if (sl) {
344            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
345            sh -= sl;
346            sl = 0;
347            s.reg = ireg;
348        }
349        // scaling (V-V>>dbits)
350        SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
351        const int shift = (GGL_DITHER_BITS - (sbits-dbits));
352        if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
353        else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
354        else                ADD(AL, 0, ireg, ireg, dither.reg);
355        s.reg = ireg;
356    }
357
358    if ((maskLoBits|dithering) && (sh > dbits)) {
359        int shift = sh-dbits;
360        if (dl) {
361            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
362            if (ireg == d.reg) {
363                MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
364            } else {
365                ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
366            }
367        } else {
368            if (ireg == d.reg) {
369                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
370            } else {
371                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
372            }
373        }
374    } else {
375        int shift = sh-dh;
376        if (shift>0) {
377            if (ireg == d.reg) {
378                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
379            } else {
380                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
381            }
382        } else if (shift<0) {
383            if (ireg == d.reg) {
384                MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
385            } else {
386                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
387            }
388        } else {
389            if (ireg == d.reg) {
390                if (s.reg != d.reg) {
391                    MOV(AL, 0, d.reg, s.reg);
392                }
393            } else {
394                ORR(AL, 0, d.reg, d.reg, s.reg);
395            }
396        }
397    }
398}
399
400}; // namespace android
401