1/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
21#include "GGLAssembler.h"
22
23namespace android {
24
25// ----------------------------------------------------------------------------
26
27void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
28{
29    const int bits = addr.size;
30    const int inc = (flags & WRITE_BACK)?1:0;
31    switch (bits) {
32    case 32:
33        if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
34        else        STR(AL, s.reg, addr.reg);
35        break;
36    case 24:
37        // 24 bits formats are a little special and used only for RGB
38        // 0x00BBGGRR is unpacked as R,G,B
39        STRB(AL, s.reg, addr.reg, immed12_pre(0));
40        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
41        STRB(AL, s.reg, addr.reg, immed12_pre(1));
42        MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
43        STRB(AL, s.reg, addr.reg, immed12_pre(2));
44        if (!(s.flags & CORRUPTIBLE)) {
45            MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
46        }
47        if (inc)
48            ADD(AL, 0, addr.reg, addr.reg, imm(3));
49        break;
50    case 16:
51        if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
52        else        STRH(AL, s.reg, addr.reg);
53        break;
54    case  8:
55        if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
56        else        STRB(AL, s.reg, addr.reg);
57        break;
58    }
59}
60
61void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
62{
63    Scratch scratches(registerFile());
64    int s0;
65
66    const int bits = addr.size;
67    const int inc = (flags & WRITE_BACK)?1:0;
68    switch (bits) {
69    case 32:
70        if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
71        else        LDR(AL, s.reg, addr.reg);
72        break;
73    case 24:
74        // 24 bits formats are a little special and used only for RGB
75        // R,G,B is packed as 0x00BBGGRR
76        s0 = scratches.obtain();
77        if (s.reg != addr.reg) {
78            LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
79            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
80            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
81            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
82            ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
83        } else {
84            int s1 = scratches.obtain();
85            LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
86            LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
87            ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
88            LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
89            ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
90        }
91        if (inc)
92            ADD(AL, 0, addr.reg, addr.reg, imm(3));
93        break;
94    case 16:
95        if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
96        else        LDRH(AL, s.reg, addr.reg);
97        break;
98    case  8:
99        if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
100        else        LDRB(AL, s.reg, addr.reg);
101        break;
102    }
103}
104
105void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
106{
107    const int maskLen = h-l;
108
109#ifdef __mips__
110    assert(maskLen<=11);
111#else
112    assert(maskLen<=8);
113#endif
114    assert(h);
115
116    if (h != bits) {
117        const int mask = ((1<<maskLen)-1) << l;
118        if (isValidImmediate(mask)) {
119            AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
120        } else if (isValidImmediate(~mask)) {
121            BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
122        } else {
123            MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
124            l += 32-h;
125            h = 32;
126        }
127        s = d.reg;
128    }
129
130    if (l) {
131        MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
132        s = d.reg;
133    }
134
135    if (s != d.reg) {
136        MOV(AL, 0, d.reg, s);
137    }
138
139    d.s = maskLen;
140}
141
142void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
143{
144    extract(d,  s.reg,
145                s.format.c[component].h,
146                s.format.c[component].l,
147                s.size());
148}
149
150void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
151{
152    integer_t r(d.reg, 32, d.flags);
153    extract(r,  s.reg,
154                s.format.c[component].h,
155                s.format.c[component].l,
156                s.size());
157    d = component_t(r);
158}
159
160
161void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
162{
163    if (s.l || (s.flags & CLEAR_HI)) {
164        extract(d, s.reg, s.h, s.l, 32);
165        expand(d, d, dbits);
166    } else {
167        expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
168    }
169}
170
171void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
172{
173    integer_t r(d.reg, 32, d.flags);
174    expand(r, s, dbits);
175    d = component_t(r);
176}
177
178void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
179{
180    assert(src.size());
181
182    int sbits = src.size();
183    int s = src.reg;
184    int d = dst.reg;
185
186    // be sure to set 'dst' after we read 'src' as they may be identical
187    dst.s = dbits;
188    dst.flags = 0;
189
190    if (dbits<=sbits) {
191        if (s != d) {
192            MOV(AL, 0, d, s);
193        }
194        return;
195    }
196
197    if (sbits == 1) {
198        RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
199            // d = (s<<dbits) - s;
200        return;
201    }
202
203    if (dbits % sbits) {
204        MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
205            // d = s << (dbits-sbits);
206        dbits -= sbits;
207        do {
208            ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
209                // d |= d >> sbits;
210            dbits -= sbits;
211            sbits *= 2;
212        } while(dbits>0);
213        return;
214    }
215
216    dbits -= sbits;
217    do {
218        ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
219            // d |= d<<sbits;
220        s = d;
221        dbits -= sbits;
222        if (sbits*2 < dbits) {
223            sbits *= 2;
224        }
225    } while(dbits>0);
226}
227
228void GGLAssembler::downshift(
229        pixel_t& d, int component, component_t s, const reg_t& dither)
230{
231    const needs_t& needs = mBuilderContext.needs;
232    Scratch scratches(registerFile());
233
234    int sh = s.h;
235    int sl = s.l;
236    int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
237    int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
238    int sbits = sh - sl;
239
240    int dh = d.format.c[component].h;
241    int dl = d.format.c[component].l;
242    int dbits = dh - dl;
243    int dithering = 0;
244
245    ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
246
247    if (sbits>dbits) {
248        // see if we need to dither
249        dithering = mDithering;
250    }
251
252    int ireg = d.reg;
253    if (!(d.flags & FIRST)) {
254        if (s.flags & CORRUPTIBLE)  {
255            ireg = s.reg;
256        } else {
257            ireg = scratches.obtain();
258        }
259    }
260    d.flags &= ~FIRST;
261
262    if (maskHiBits) {
263        // we need to mask the high bits (and possibly the lowbits too)
264        // and we might be able to use immediate mask.
265        if (!dithering) {
266            // we don't do this if we only have maskLoBits because we can
267            // do it more efficiently below (in the case where dl=0)
268            const int offset = sh - dbits;
269            if (dbits<=8 && offset >= 0) {
270                const uint32_t mask = ((1<<dbits)-1) << offset;
271                if (isValidImmediate(mask) || isValidImmediate(~mask)) {
272                    build_and_immediate(ireg, s.reg, mask, 32);
273                    sl = offset;
274                    s.reg = ireg;
275                    sbits = dbits;
276                    maskLoBits = maskHiBits = 0;
277                }
278            }
279        } else {
280            // in the dithering case though, we need to preserve the lower bits
281            const uint32_t mask = ((1<<sbits)-1) << sl;
282            if (isValidImmediate(mask) || isValidImmediate(~mask)) {
283                build_and_immediate(ireg, s.reg, mask, 32);
284                s.reg = ireg;
285                maskLoBits = maskHiBits = 0;
286            }
287        }
288    }
289
290    // XXX: we could special case (maskHiBits & !maskLoBits)
291    // like we do for maskLoBits below, but it happens very rarely
292    // that we have maskHiBits only and the conditions necessary to lead
293    // to better code (like doing d |= s << 24)
294
295    if (maskHiBits) {
296        MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
297        sl += 32-sh;
298        sh = 32;
299        s.reg = ireg;
300        maskHiBits = 0;
301    }
302
303    //	Downsampling should be performed as follows:
304    //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
305    //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
306    //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
307    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
308    //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
309    //
310    //	By approximating (1>>dbits) and (1>>sbits) to 0:
311    //
312    //		V>>(sbits-dbits)	-	V>>sbits
313    //
314	//  A good approximation is V>>(sbits-dbits),
315    //  but better one (needed for dithering) is:
316    //
317    //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
318    //		(V<<dbits	-	V)>>sbits
319    //		(V	-	V>>dbits)>>(sbits-dbits)
320
321    // Dithering is done here
322    if (dithering) {
323        comment("dithering");
324        if (sl) {
325            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
326            sh -= sl;
327            sl = 0;
328            s.reg = ireg;
329        }
330        // scaling (V-V>>dbits)
331        SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
332        const int shift = (GGL_DITHER_BITS - (sbits-dbits));
333        if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
334        else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
335        else                ADD(AL, 0, ireg, ireg, dither.reg);
336        s.reg = ireg;
337    }
338
339    if ((maskLoBits|dithering) && (sh > dbits)) {
340        int shift = sh-dbits;
341        if (dl) {
342            MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
343            if (ireg == d.reg) {
344                MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
345            } else {
346                ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
347            }
348        } else {
349            if (ireg == d.reg) {
350                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
351            } else {
352                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
353            }
354        }
355    } else {
356        int shift = sh-dh;
357        if (shift>0) {
358            if (ireg == d.reg) {
359                MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
360            } else {
361                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
362            }
363        } else if (shift<0) {
364            if (ireg == d.reg) {
365                MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
366            } else {
367                ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
368            }
369        } else {
370            if (ireg == d.reg) {
371                if (s.reg != d.reg) {
372                    MOV(AL, 0, d.reg, s.reg);
373                }
374            } else {
375                ORR(AL, 0, d.reg, d.reg, s.reg);
376            }
377        }
378    }
379}
380
381}; // namespace android
382