1/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
21
22#include "codeflinger/GGLAssembler.h"
23
24namespace android {
25
26// ----------------------------------------------------------------------------
27
28void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
29{
30    const int bits = addr.size;
31    const int inc = (flags & WRITE_BACK)?1:0;
32    switch (bits) {
33    case 32:
34        if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
35        else        STR(AL, s.reg, addr.reg);
36        break;
37    case 24:
38        // 24 bits formats are a little special and used only for RGB
39        // 0x00BBGGRR is unpacked as R,G,B
40        STRB(AL, s.reg, addr.reg, immed12_pre(0));
41        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
42        STRB(AL, s.reg, addr.reg, immed12_pre(1));
43        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
44        STRB(AL, s.reg, addr.reg, immed12_pre(2));
45        if (!(s.flags & CORRUPTIBLE)) {
46            MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
47        }
48        if (inc)
49            ADD(AL, 0, addr.reg, addr.reg, imm(3));
50        break;
51    case 16:
52        if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
53        else        STRH(AL, s.reg, addr.reg);
54        break;
55    case  8:
56        if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
57        else        STRB(AL, s.reg, addr.reg);
58        break;
59    }
60}
61
62void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
63{
64    Scratch scratches(registerFile());
65    int s0;
66
67    const int bits = addr.size;
68    const int inc = (flags & WRITE_BACK)?1:0;
69    switch (bits) {
70    case 32:
71        if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
72        else        LDR(AL, s.reg, addr.reg);
73        break;
74    case 24:
75        // 24 bits formats are a little special and used only for RGB
76        // R,G,B is packed as 0x00BBGGRR
77        s0 = scratches.obtain();
78        if (s.reg != addr.reg) {
79            LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
80            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
81            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
82            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
83            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
84        } else {
85            int s1 = scratches.obtain();
86            LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
87            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
88            ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
89            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
90            ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
91        }
92        if (inc)
93            ADD(AL, 0, addr.reg, addr.reg, imm(3));
94        break;
95    case 16:
96        if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
97        else        LDRH(AL, s.reg, addr.reg);
98        break;
99    case  8:
100        if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
101        else        LDRB(AL, s.reg, addr.reg);
102        break;
103    }
104}
105
106void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
107{
108    const int maskLen = h-l;
109
110    assert(maskLen<=8);
111    assert(h);
112
113    if (h != bits) {
114        const int mask = ((1<<maskLen)-1) << l;
115        if (isValidImmediate(mask)) {
116            AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
117        } else if (isValidImmediate(~mask)) {
118            BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
119        } else {
120            MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
121            l += 32-h;
122            h = 32;
123        }
124        s = d.reg;
125    }
126
127    if (l) {
128        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
129        s = d.reg;
130    }
131
132    if (s != d.reg) {
133        MOV(AL, 0, d.reg, s);
134    }
135
136    d.s = maskLen;
137}
138
139void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
140{
141    extract(d,  s.reg,
142                s.format.c[component].h,
143                s.format.c[component].l,
144                s.size());
145}
146
147void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
148{
149    integer_t r(d.reg, 32, d.flags);
150    extract(r,  s.reg,
151                s.format.c[component].h,
152                s.format.c[component].l,
153                s.size());
154    d = component_t(r);
155}
156
157
158void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
159{
160    if (s.l || (s.flags & CLEAR_HI)) {
161        extract(d, s.reg, s.h, s.l, 32);
162        expand(d, d, dbits);
163    } else {
164        expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
165    }
166}
167
168void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
169{
170    integer_t r(d.reg, 32, d.flags);
171    expand(r, s, dbits);
172    d = component_t(r);
173}
174
175void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
176{
177    assert(src.size());
178
179    int sbits = src.size();
180    int s = src.reg;
181    int d = dst.reg;
182
183    // be sure to set 'dst' after we read 'src' as they may be identical
184    dst.s = dbits;
185    dst.flags = 0;
186
187    if (dbits<=sbits) {
188        if (s != d) {
189            MOV(AL, 0, d, s);
190        }
191        return;
192    }
193
194    if (sbits == 1) {
195        RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
196            // d = (s<<dbits) - s;
197        return;
198    }
199
200    if (dbits % sbits) {
201        MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
202            // d = s << (dbits-sbits);
203        dbits -= sbits;
204        do {
205            ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
206                // d |= d >> sbits;
207            dbits -= sbits;
208            sbits *= 2;
209        } while(dbits>0);
210        return;
211    }
212
213    dbits -= sbits;
214    do {
215        ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
216            // d |= d<<sbits;
217        s = d;
218        dbits -= sbits;
219        if (sbits*2 < dbits) {
220            sbits *= 2;
221        }
222    } while(dbits>0);
223}
224
225void GGLAssembler::downshift(
226        pixel_t& d, int component, component_t s, const reg_t& dither)
227{
228    const needs_t& needs = mBuilderContext.needs;
229    Scratch scratches(registerFile());
230
231    int sh = s.h;
232    int sl = s.l;
233    int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
234    int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
235    int sbits = sh - sl;
236
237    int dh = d.format.c[component].h;
238    int dl = d.format.c[component].l;
239    int dbits = dh - dl;
240    int dithering = 0;
241
242    LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
243
244    if (sbits>dbits) {
245        // see if we need to dither
246        dithering = mDithering;
247    }
248
249    int ireg = d.reg;
250    if (!(d.flags & FIRST)) {
251        if (s.flags & CORRUPTIBLE)  {
252            ireg = s.reg;
253        } else {
254            ireg = scratches.obtain();
255        }
256    }
257    d.flags &= ~FIRST;
258
259    if (maskHiBits) {
260        // we need to mask the high bits (and possibly the lowbits too)
261        // and we might be able to use immediate mask.
262        if (!dithering) {
263            // we don't do this if we only have maskLoBits because we can
264            // do it more efficiently below (in the case where dl=0)
265            const int offset = sh - dbits;
266            if (dbits<=8 && offset >= 0) {
267                const uint32_t mask = ((1<<dbits)-1) << offset;
268                if (isValidImmediate(mask) || isValidImmediate(~mask)) {
269                    build_and_immediate(ireg, s.reg, mask, 32);
270                    sl = offset;
271                    s.reg = ireg;
272                    sbits = dbits;
273                    maskLoBits = maskHiBits = 0;
274                }
275            }
276        } else {
277            // in the dithering case though, we need to preserve the lower bits
278            const uint32_t mask = ((1<<sbits)-1) << sl;
279            if (isValidImmediate(mask) || isValidImmediate(~mask)) {
280                build_and_immediate(ireg, s.reg, mask, 32);
281                s.reg = ireg;
282                maskLoBits = maskHiBits = 0;
283            }
284        }
285    }
286
287    // XXX: we could special case (maskHiBits & !maskLoBits)
288    // like we do for maskLoBits below, but it happens very rarely
289    // that we have maskHiBits only and the conditions necessary to lead
290    // to better code (like doing d |= s << 24)
291
292    if (maskHiBits) {
293        MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
294        sl += 32-sh;
295        sh = 32;
296        s.reg = ireg;
297        maskHiBits = 0;
298    }
299
300    //	Downsampling should be performed as follows:
301    //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
302    //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
303    //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
304    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
305    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
306    //
307    //	By approximating (1>>dbits) and (1>>sbits) to 0:
308    //
309    //		V>>(sbits-dbits)	-	V>>sbits
310    //
311	//  A good approximation is V>>(sbits-dbits),
312    //  but better one (needed for dithering) is:
313    //
314    //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
315    //		(V<<dbits	-	V)>>sbits
316    //		(V	-	V>>dbits)>>(sbits-dbits)
317
318    // Dithering is done here
319    if (dithering) {
320        comment("dithering");
321        if (sl) {
322            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
323            sh -= sl;
324            sl = 0;
325            s.reg = ireg;
326        }
327        // scaling (V-V>>dbits)
328        SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
329        const int shift = (GGL_DITHER_BITS - (sbits-dbits));
330        if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
331        else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
332        else                ADD(AL, 0, ireg, ireg, dither.reg);
333        s.reg = ireg;
334    }
335
336    if ((maskLoBits|dithering) && (sh > dbits)) {
337        int shift = sh-dbits;
338        if (dl) {
339            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
340            if (ireg == d.reg) {
341                MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
342            } else {
343                ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
344            }
345        } else {
346            if (ireg == d.reg) {
347                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
348            } else {
349                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
350            }
351        }
352    } else {
353        int shift = sh-dh;
354        if (shift>0) {
355            if (ireg == d.reg) {
356                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
357            } else {
358                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
359            }
360        } else if (shift<0) {
361            if (ireg == d.reg) {
362                MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
363            } else {
364                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
365            }
366        } else {
367            if (ireg == d.reg) {
368                if (s.reg != d.reg) {
369                    MOV(AL, 0, d.reg, s.reg);
370                }
371            } else {
372                ORR(AL, 0, d.reg, d.reg, s.reg);
373            }
374        }
375    }
376}
377
378}; // namespace android
379