1/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define LOG_TAG "pixelflinger-code"
19
20#include <assert.h>
21#include <stdio.h>
22
23#include <log/log.h>
24
25#include "GGLAssembler.h"
26
27namespace android {
28
29// ----------------------------------------------------------------------------
30
31void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32{
33    const int bits = addr.size;
34    const int inc = (flags & WRITE_BACK)?1:0;
35    switch (bits) {
36    case 32:
37        if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
38        else        STR(AL, s.reg, addr.reg);
39        break;
40    case 24:
41        // 24 bits formats are a little special and used only for RGB
42        // 0x00BBGGRR is unpacked as R,G,B
43        STRB(AL, s.reg, addr.reg, immed12_pre(0));
44        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45        STRB(AL, s.reg, addr.reg, immed12_pre(1));
46        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47        STRB(AL, s.reg, addr.reg, immed12_pre(2));
48        if (!(s.flags & CORRUPTIBLE)) {
49            MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50        }
51        if (inc)
52            ADD(AL, 0, addr.reg, addr.reg, imm(3));
53        break;
54    case 16:
55        if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
56        else        STRH(AL, s.reg, addr.reg);
57        break;
58    case  8:
59        if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
60        else        STRB(AL, s.reg, addr.reg);
61        break;
62    }
63}
64
65void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66{
67    Scratch scratches(registerFile());
68    int s0;
69
70    const int bits = addr.size;
71    const int inc = (flags & WRITE_BACK)?1:0;
72    switch (bits) {
73    case 32:
74        if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
75        else        LDR(AL, s.reg, addr.reg);
76        break;
77    case 24:
78        // 24 bits formats are a little special and used only for RGB
79        // R,G,B is packed as 0x00BBGGRR
80        s0 = scratches.obtain();
81        if (s.reg != addr.reg) {
82            LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
83            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
84            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
86            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87        } else {
88            int s1 = scratches.obtain();
89            LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
90            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
91            ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
93            ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94        }
95        if (inc)
96            ADD(AL, 0, addr.reg, addr.reg, imm(3));
97        break;
98    case 16:
99        if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
100        else        LDRH(AL, s.reg, addr.reg);
101        break;
102    case  8:
103        if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
104        else        LDRB(AL, s.reg, addr.reg);
105        break;
106    }
107}
108
109void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110{
111    const int maskLen = h-l;
112
113#ifdef __mips__
114    assert(maskLen<=11);
115#else
116    assert(maskLen<=8);
117#endif
118    assert(h);
119
120    if (h != bits) {
121        const int mask = ((1<<maskLen)-1) << l;
122        if (isValidImmediate(mask)) {
123            AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
124        } else if (isValidImmediate(~mask)) {
125            BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
126        } else {
127            MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
128            l += 32-h;
129            h = 32;
130        }
131        s = d.reg;
132    }
133
134    if (l) {
135        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
136        s = d.reg;
137    }
138
139    if (s != d.reg) {
140        MOV(AL, 0, d.reg, s);
141    }
142
143    d.s = maskLen;
144}
145
146void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
147{
148    extract(d,  s.reg,
149                s.format.c[component].h,
150                s.format.c[component].l,
151                s.size());
152}
153
154void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
155{
156    integer_t r(d.reg, 32, d.flags);
157    extract(r,  s.reg,
158                s.format.c[component].h,
159                s.format.c[component].l,
160                s.size());
161    d = component_t(r);
162}
163
164
165void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
166{
167    if (s.l || (s.flags & CLEAR_HI)) {
168        extract(d, s.reg, s.h, s.l, 32);
169        expand(d, d, dbits);
170    } else {
171        expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
172    }
173}
174
175void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
176{
177    integer_t r(d.reg, 32, d.flags);
178    expand(r, s, dbits);
179    d = component_t(r);
180}
181
182void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
183{
184    assert(src.size());
185
186    int sbits = src.size();
187    int s = src.reg;
188    int d = dst.reg;
189
190    // be sure to set 'dst' after we read 'src' as they may be identical
191    dst.s = dbits;
192    dst.flags = 0;
193
194    if (dbits<=sbits) {
195        if (s != d) {
196            MOV(AL, 0, d, s);
197        }
198        return;
199    }
200
201    if (sbits == 1) {
202        RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
203            // d = (s<<dbits) - s;
204        return;
205    }
206
207    if (dbits % sbits) {
208        MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
209            // d = s << (dbits-sbits);
210        dbits -= sbits;
211        do {
212            ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
213                // d |= d >> sbits;
214            dbits -= sbits;
215            sbits *= 2;
216        } while(dbits>0);
217        return;
218    }
219
220    dbits -= sbits;
221    do {
222        ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
223            // d |= d<<sbits;
224        s = d;
225        dbits -= sbits;
226        if (sbits*2 < dbits) {
227            sbits *= 2;
228        }
229    } while(dbits>0);
230}
231
232void GGLAssembler::downshift(
233        pixel_t& d, int component, component_t s, const reg_t& dither)
234{
235    const needs_t& needs = mBuilderContext.needs;
236    Scratch scratches(registerFile());
237
238    int sh = s.h;
239    int sl = s.l;
240    int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
241    int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
242    int sbits = sh - sl;
243
244    int dh = d.format.c[component].h;
245    int dl = d.format.c[component].l;
246    int dbits = dh - dl;
247    int dithering = 0;
248
249    ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
250
251    if (sbits>dbits) {
252        // see if we need to dither
253        dithering = mDithering;
254    }
255
256    int ireg = d.reg;
257    if (!(d.flags & FIRST)) {
258        if (s.flags & CORRUPTIBLE)  {
259            ireg = s.reg;
260        } else {
261            ireg = scratches.obtain();
262        }
263    }
264    d.flags &= ~FIRST;
265
266    if (maskHiBits) {
267        // we need to mask the high bits (and possibly the lowbits too)
268        // and we might be able to use immediate mask.
269        if (!dithering) {
270            // we don't do this if we only have maskLoBits because we can
271            // do it more efficiently below (in the case where dl=0)
272            const int offset = sh - dbits;
273            if (dbits<=8 && offset >= 0) {
274                const uint32_t mask = ((1<<dbits)-1) << offset;
275                if (isValidImmediate(mask) || isValidImmediate(~mask)) {
276                    build_and_immediate(ireg, s.reg, mask, 32);
277                    sl = offset;
278                    s.reg = ireg;
279                    sbits = dbits;
280                    maskLoBits = maskHiBits = 0;
281                }
282            }
283        } else {
284            // in the dithering case though, we need to preserve the lower bits
285            const uint32_t mask = ((1<<sbits)-1) << sl;
286            if (isValidImmediate(mask) || isValidImmediate(~mask)) {
287                build_and_immediate(ireg, s.reg, mask, 32);
288                s.reg = ireg;
289                maskLoBits = maskHiBits = 0;
290            }
291        }
292    }
293
294    // XXX: we could special case (maskHiBits & !maskLoBits)
295    // like we do for maskLoBits below, but it happens very rarely
296    // that we have maskHiBits only and the conditions necessary to lead
297    // to better code (like doing d |= s << 24)
298
299    if (maskHiBits) {
300        MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
301        sl += 32-sh;
302        sh = 32;
303        s.reg = ireg;
304        maskHiBits = 0;
305    }
306
307    //	Downsampling should be performed as follows:
308    //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
309    //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
310    //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
311    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
312    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
313    //
314    //	By approximating (1>>dbits) and (1>>sbits) to 0:
315    //
316    //		V>>(sbits-dbits)	-	V>>sbits
317    //
318	//  A good approximation is V>>(sbits-dbits),
319    //  but better one (needed for dithering) is:
320    //
321    //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
322    //		(V<<dbits	-	V)>>sbits
323    //		(V	-	V>>dbits)>>(sbits-dbits)
324
325    // Dithering is done here
326    if (dithering) {
327        comment("dithering");
328        if (sl) {
329            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
330            sh -= sl;
331            sl = 0;
332            s.reg = ireg;
333        }
334        // scaling (V-V>>dbits)
335        SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
336        const int shift = (GGL_DITHER_BITS - (sbits-dbits));
337        if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
338        else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
339        else                ADD(AL, 0, ireg, ireg, dither.reg);
340        s.reg = ireg;
341    }
342
343    if ((maskLoBits|dithering) && (sh > dbits)) {
344        int shift = sh-dbits;
345        if (dl) {
346            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
347            if (ireg == d.reg) {
348                MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
349            } else {
350                ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
351            }
352        } else {
353            if (ireg == d.reg) {
354                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
355            } else {
356                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
357            }
358        }
359    } else {
360        int shift = sh-dh;
361        if (shift>0) {
362            if (ireg == d.reg) {
363                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
364            } else {
365                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
366            }
367        } else if (shift<0) {
368            if (ireg == d.reg) {
369                MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
370            } else {
371                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
372            }
373        } else {
374            if (ireg == d.reg) {
375                if (s.reg != d.reg) {
376                    MOV(AL, 0, d.reg, s.reg);
377                }
378            } else {
379                ORR(AL, 0, d.reg, d.reg, s.reg);
380            }
381        }
382    }
383}
384
385}; // namespace android
386