1/* libs/pixelflinger/codeflinger/load_store.cpp 2** 3** Copyright 2006, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18#include <assert.h> 19#include <stdio.h> 20#include <cutils/log.h> 21 22#include "codeflinger/GGLAssembler.h" 23 24namespace android { 25 26// ---------------------------------------------------------------------------- 27 28void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) 29{ 30 const int bits = addr.size; 31 const int inc = (flags & WRITE_BACK)?1:0; 32 switch (bits) { 33 case 32: 34 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); 35 else STR(AL, s.reg, addr.reg); 36 break; 37 case 24: 38 // 24 bits formats are a little special and used only for RGB 39 // 0x00BBGGRR is unpacked as R,G,B 40 STRB(AL, s.reg, addr.reg, immed12_pre(0)); 41 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 42 STRB(AL, s.reg, addr.reg, immed12_pre(1)); 43 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 44 STRB(AL, s.reg, addr.reg, immed12_pre(2)); 45 if (!(s.flags & CORRUPTIBLE)) { 46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); 47 } 48 if (inc) 49 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 50 break; 51 case 16: 52 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); 53 else STRH(AL, s.reg, addr.reg); 54 break; 55 case 8: 56 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); 57 else STRB(AL, s.reg, addr.reg); 58 break; 59 } 60} 61 62void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) 63{ 64 Scratch scratches(registerFile()); 65 int s0; 66 67 const int bits = addr.size; 68 const int inc = (flags & WRITE_BACK)?1:0; 69 switch (bits) { 70 case 32: 71 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); 72 else LDR(AL, s.reg, addr.reg); 73 break; 74 case 24: 75 // 24 bits formats are a little special and used only for RGB 76 // R,G,B is packed as 0x00BBGGRR 77 s0 = scratches.obtain(); 78 if (s.reg != addr.reg) { 79 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R 80 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 81 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); 82 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 83 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); 84 } else { 85 int s1 = scratches.obtain(); 86 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R 87 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 88 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); 89 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 90 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); 91 } 92 if (inc) 93 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 94 break; 95 case 16: 96 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); 97 else LDRH(AL, s.reg, addr.reg); 98 break; 99 case 8: 100 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); 101 else LDRB(AL, s.reg, addr.reg); 102 break; 103 } 104} 105 106void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) 107{ 108 const int maskLen = h-l; 109 110 assert(maskLen<=8); 111 assert(h); 112 113 if (h != bits) { 114 const int mask = ((1<<maskLen)-1) << l; 115 if (isValidImmediate(mask)) { 116 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 117 } else if (isValidImmediate(~mask)) { 118 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 119 } else { 120 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); 121 l += 32-h; 122 h = 32; 123 } 124 s = d.reg; 125 } 126 127 if (l) { 128 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 129 s = d.reg; 130 } 131 132 if (s != d.reg) { 133 MOV(AL, 0, d.reg, s); 134 } 135 136 d.s = maskLen; 137} 138 139void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) 140{ 141 extract(d, s.reg, 142 s.format.c[component].h, 143 s.format.c[component].l, 144 s.size()); 145} 146 147void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) 148{ 149 integer_t r(d.reg, 32, d.flags); 150 extract(r, s.reg, 151 s.format.c[component].h, 152 s.format.c[component].l, 153 s.size()); 154 d = component_t(r); 155} 156 157 158void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) 159{ 160 if (s.l || (s.flags & CLEAR_HI)) { 161 extract(d, s.reg, s.h, s.l, 32); 162 expand(d, d, dbits); 163 } else { 164 expand(d, integer_t(s.reg, s.size(), s.flags), dbits); 165 } 166} 167 168void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) 169{ 170 integer_t r(d.reg, 32, d.flags); 171 expand(r, s, dbits); 172 d = component_t(r); 173} 174 175void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) 176{ 177 assert(src.size()); 178 179 int sbits = src.size(); 180 int s = src.reg; 181 int d = dst.reg; 182 183 // be sure to set 'dst' after we read 'src' as they may be identical 184 dst.s = dbits; 185 dst.flags = 0; 186 187 if (dbits<=sbits) { 188 if (s != d) { 189 MOV(AL, 0, d, s); 190 } 191 return; 192 } 193 194 if (sbits == 1) { 195 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); 196 // d = (s<<dbits) - s; 197 return; 198 } 199 200 if (dbits % sbits) { 201 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); 202 // d = s << (dbits-sbits); 203 dbits -= sbits; 204 do { 205 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); 206 // d |= d >> sbits; 207 dbits -= sbits; 208 sbits *= 2; 209 } while(dbits>0); 210 return; 211 } 212 213 dbits -= sbits; 214 do { 215 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); 216 // d |= d<<sbits; 217 s = d; 218 dbits -= sbits; 219 if (sbits*2 < dbits) { 220 sbits *= 2; 221 } 222 } while(dbits>0); 223} 224 225void GGLAssembler::downshift( 226 pixel_t& d, int component, component_t s, const reg_t& dither) 227{ 228 const needs_t& needs = mBuilderContext.needs; 229 Scratch scratches(registerFile()); 230 231 int sh = s.h; 232 int sl = s.l; 233 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; 234 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; 235 int sbits = sh - sl; 236 237 int dh = d.format.c[component].h; 238 int dl = d.format.c[component].l; 239 int dbits = dh - dl; 240 int dithering = 0; 241 242 LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); 243 244 if (sbits>dbits) { 245 // see if we need to dither 246 dithering = mDithering; 247 } 248 249 int ireg = d.reg; 250 if (!(d.flags & FIRST)) { 251 if (s.flags & CORRUPTIBLE) { 252 ireg = s.reg; 253 } else { 254 ireg = scratches.obtain(); 255 } 256 } 257 d.flags &= ~FIRST; 258 259 if (maskHiBits) { 260 // we need to mask the high bits (and possibly the lowbits too) 261 // and we might be able to use immediate mask. 262 if (!dithering) { 263 // we don't do this if we only have maskLoBits because we can 264 // do it more efficiently below (in the case where dl=0) 265 const int offset = sh - dbits; 266 if (dbits<=8 && offset >= 0) { 267 const uint32_t mask = ((1<<dbits)-1) << offset; 268 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 269 build_and_immediate(ireg, s.reg, mask, 32); 270 sl = offset; 271 s.reg = ireg; 272 sbits = dbits; 273 maskLoBits = maskHiBits = 0; 274 } 275 } 276 } else { 277 // in the dithering case though, we need to preserve the lower bits 278 const uint32_t mask = ((1<<sbits)-1) << sl; 279 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 280 build_and_immediate(ireg, s.reg, mask, 32); 281 s.reg = ireg; 282 maskLoBits = maskHiBits = 0; 283 } 284 } 285 } 286 287 // XXX: we could special case (maskHiBits & !maskLoBits) 288 // like we do for maskLoBits below, but it happens very rarely 289 // that we have maskHiBits only and the conditions necessary to lead 290 // to better code (like doing d |= s << 24) 291 292 if (maskHiBits) { 293 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); 294 sl += 32-sh; 295 sh = 32; 296 s.reg = ireg; 297 maskHiBits = 0; 298 } 299 300 // Downsampling should be performed as follows: 301 // V * ((1<<dbits)-1) / ((1<<sbits)-1) 302 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] 303 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] 304 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits 305 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) 306 // 307 // By approximating (1>>dbits) and (1>>sbits) to 0: 308 // 309 // V>>(sbits-dbits) - V>>sbits 310 // 311 // A good approximation is V>>(sbits-dbits), 312 // but better one (needed for dithering) is: 313 // 314 // (V>>(sbits-dbits)<<sbits - V)>>sbits 315 // (V<<dbits - V)>>sbits 316 // (V - V>>dbits)>>(sbits-dbits) 317 318 // Dithering is done here 319 if (dithering) { 320 comment("dithering"); 321 if (sl) { 322 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); 323 sh -= sl; 324 sl = 0; 325 s.reg = ireg; 326 } 327 // scaling (V-V>>dbits) 328 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); 329 const int shift = (GGL_DITHER_BITS - (sbits-dbits)); 330 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); 331 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); 332 else ADD(AL, 0, ireg, ireg, dither.reg); 333 s.reg = ireg; 334 } 335 336 if ((maskLoBits|dithering) && (sh > dbits)) { 337 int shift = sh-dbits; 338 if (dl) { 339 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); 340 if (ireg == d.reg) { 341 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); 342 } else { 343 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); 344 } 345 } else { 346 if (ireg == d.reg) { 347 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 348 } else { 349 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 350 } 351 } 352 } else { 353 int shift = sh-dh; 354 if (shift>0) { 355 if (ireg == d.reg) { 356 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 357 } else { 358 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 359 } 360 } else if (shift<0) { 361 if (ireg == d.reg) { 362 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); 363 } else { 364 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); 365 } 366 } else { 367 if (ireg == d.reg) { 368 if (s.reg != d.reg) { 369 MOV(AL, 0, d.reg, s.reg); 370 } 371 } else { 372 ORR(AL, 0, d.reg, d.reg, s.reg); 373 } 374 } 375 } 376} 377 378}; // namespace android 379