load_store.cpp revision 4dc1fa8e8d1f14868ab8bba93a8cbb87f847c4e3
1/* libs/pixelflinger/codeflinger/load_store.cpp 2** 3** Copyright 2006, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18#include <assert.h> 19#include <stdio.h> 20#include <cutils/log.h> 21#include "codeflinger/GGLAssembler.h" 22 23#include <machine/cpu-features.h> 24 25namespace android { 26 27// ---------------------------------------------------------------------------- 28 29void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) 30{ 31 const int bits = addr.size; 32 const int inc = (flags & WRITE_BACK)?1:0; 33 switch (bits) { 34 case 32: 35 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); 36 else STR(AL, s.reg, addr.reg); 37 break; 38 case 24: 39 // 24 bits formats are a little special and used only for RGB 40 // 0x00BBGGRR is unpacked as R,G,B 41 STRB(AL, s.reg, addr.reg, immed12_pre(0)); 42 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 43 STRB(AL, s.reg, addr.reg, immed12_pre(1)); 44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 45 STRB(AL, s.reg, addr.reg, immed12_pre(2)); 46 if (!(s.flags & CORRUPTIBLE)) { 47 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); 48 } 49 if (inc) 50 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 51 break; 52 case 16: 53 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); 54 else STRH(AL, s.reg, addr.reg); 55 break; 56 case 8: 57 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); 58 else STRB(AL, s.reg, addr.reg); 59 break; 60 } 61} 62 63void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) 64{ 65 Scratch scratches(registerFile()); 66 int s0; 67 68 const int bits = addr.size; 69 const int inc = (flags & WRITE_BACK)?1:0; 70 switch (bits) { 71 case 32: 72 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); 73 else LDR(AL, s.reg, addr.reg); 74 break; 75 case 24: 76 // 24 bits formats are a little special and used only for RGB 77 // R,G,B is packed as 0x00BBGGRR 78 s0 = scratches.obtain(); 79 if (s.reg != addr.reg) { 80 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R 81 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 82 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); 83 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); 85 } else { 86 int s1 = scratches.obtain(); 87 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R 88 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 89 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); 90 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 91 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); 92 } 93 if (inc) 94 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 95 break; 96 case 16: 97 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); 98 else LDRH(AL, s.reg, addr.reg); 99 break; 100 case 8: 101 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); 102 else LDRB(AL, s.reg, addr.reg); 103 break; 104 } 105} 106 107void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) 108{ 109 const int maskLen = h-l; 110 111 assert(maskLen<=8); 112 assert(h); 113 114#if __ARM_ARCH__ >= 7 115 const int mask = (1<<maskLen)-1; 116 if ((h == bits) && !l && (s != d.reg)) { 117 MOV(AL, 0, d.reg, s); // component = packed; 118 } else if ((h == bits) && l) { 119 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 120 } else if (!l && isValidImmediate(mask)) { 121 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 122 } else if (!l && isValidImmediate(~mask)) { 123 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 124 } else { 125 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l; 126 } 127#else 128 if (h != bits) { 129 const int mask = ((1<<maskLen)-1) << l; 130 if (isValidImmediate(mask)) { 131 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 132 } else if (isValidImmediate(~mask)) { 133 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 134 } else { 135 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); 136 l += 32-h; 137 h = 32; 138 } 139 s = d.reg; 140 } 141 142 if (l) { 143 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 144 s = d.reg; 145 } 146 147 if (s != d.reg) { 148 MOV(AL, 0, d.reg, s); 149 } 150#endif 151 152 d.s = maskLen; 153} 154 155void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) 156{ 157 extract(d, s.reg, 158 s.format.c[component].h, 159 s.format.c[component].l, 160 s.size()); 161} 162 163void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) 164{ 165 integer_t r(d.reg, 32, d.flags); 166 extract(r, s.reg, 167 s.format.c[component].h, 168 s.format.c[component].l, 169 s.size()); 170 d = component_t(r); 171} 172 173 174void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) 175{ 176 if (s.l || (s.flags & CLEAR_HI)) { 177 extract(d, s.reg, s.h, s.l, 32); 178 expand(d, d, dbits); 179 } else { 180 expand(d, integer_t(s.reg, s.size(), s.flags), dbits); 181 } 182} 183 184void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) 185{ 186 integer_t r(d.reg, 32, d.flags); 187 expand(r, s, dbits); 188 d = component_t(r); 189} 190 191void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) 192{ 193 assert(src.size()); 194 195 int sbits = src.size(); 196 int s = src.reg; 197 int d = dst.reg; 198 199 // be sure to set 'dst' after we read 'src' as they may be identical 200 dst.s = dbits; 201 dst.flags = 0; 202 203 if (dbits<=sbits) { 204 if (s != d) { 205 MOV(AL, 0, d, s); 206 } 207 return; 208 } 209 210 if (sbits == 1) { 211 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); 212 // d = (s<<dbits) - s; 213 return; 214 } 215 216 if (dbits % sbits) { 217 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); 218 // d = s << (dbits-sbits); 219 dbits -= sbits; 220 do { 221 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); 222 // d |= d >> sbits; 223 dbits -= sbits; 224 sbits *= 2; 225 } while(dbits>0); 226 return; 227 } 228 229 dbits -= sbits; 230 do { 231 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); 232 // d |= d<<sbits; 233 s = d; 234 dbits -= sbits; 235 if (sbits*2 < dbits) { 236 sbits *= 2; 237 } 238 } while(dbits>0); 239} 240 241void GGLAssembler::downshift( 242 pixel_t& d, int component, component_t s, const reg_t& dither) 243{ 244 const needs_t& needs = mBuilderContext.needs; 245 Scratch scratches(registerFile()); 246 247 int sh = s.h; 248 int sl = s.l; 249 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; 250 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; 251 int sbits = sh - sl; 252 253 int dh = d.format.c[component].h; 254 int dl = d.format.c[component].l; 255 int dbits = dh - dl; 256 int dithering = 0; 257 258 LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); 259 260 if (sbits>dbits) { 261 // see if we need to dither 262 dithering = mDithering; 263 } 264 265 int ireg = d.reg; 266 if (!(d.flags & FIRST)) { 267 if (s.flags & CORRUPTIBLE) { 268 ireg = s.reg; 269 } else { 270 ireg = scratches.obtain(); 271 } 272 } 273 d.flags &= ~FIRST; 274 275 if (maskHiBits) { 276 // we need to mask the high bits (and possibly the lowbits too) 277 // and we might be able to use immediate mask. 278 if (!dithering) { 279 // we don't do this if we only have maskLoBits because we can 280 // do it more efficiently below (in the case where dl=0) 281 const int offset = sh - dbits; 282 if (dbits<=8 && offset >= 0) { 283 const uint32_t mask = ((1<<dbits)-1) << offset; 284 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 285 build_and_immediate(ireg, s.reg, mask, 32); 286 sl = offset; 287 s.reg = ireg; 288 sbits = dbits; 289 maskLoBits = maskHiBits = 0; 290 } 291 } 292 } else { 293 // in the dithering case though, we need to preserve the lower bits 294 const uint32_t mask = ((1<<sbits)-1) << sl; 295 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 296 build_and_immediate(ireg, s.reg, mask, 32); 297 s.reg = ireg; 298 maskLoBits = maskHiBits = 0; 299 } 300 } 301 } 302 303 // XXX: we could special case (maskHiBits & !maskLoBits) 304 // like we do for maskLoBits below, but it happens very rarely 305 // that we have maskHiBits only and the conditions necessary to lead 306 // to better code (like doing d |= s << 24) 307 308 if (maskHiBits) { 309 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); 310 sl += 32-sh; 311 sh = 32; 312 s.reg = ireg; 313 maskHiBits = 0; 314 } 315 316 // Downsampling should be performed as follows: 317 // V * ((1<<dbits)-1) / ((1<<sbits)-1) 318 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] 319 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] 320 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits 321 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) 322 // 323 // By approximating (1>>dbits) and (1>>sbits) to 0: 324 // 325 // V>>(sbits-dbits) - V>>sbits 326 // 327 // A good approximation is V>>(sbits-dbits), 328 // but better one (needed for dithering) is: 329 // 330 // (V>>(sbits-dbits)<<sbits - V)>>sbits 331 // (V<<dbits - V)>>sbits 332 // (V - V>>dbits)>>(sbits-dbits) 333 334 // Dithering is done here 335 if (dithering) { 336 comment("dithering"); 337 if (sl) { 338 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); 339 sh -= sl; 340 sl = 0; 341 s.reg = ireg; 342 } 343 // scaling (V-V>>dbits) 344 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); 345 const int shift = (GGL_DITHER_BITS - (sbits-dbits)); 346 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); 347 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); 348 else ADD(AL, 0, ireg, ireg, dither.reg); 349 s.reg = ireg; 350 } 351 352 if ((maskLoBits|dithering) && (sh > dbits)) { 353 int shift = sh-dbits; 354 if (dl) { 355 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); 356 if (ireg == d.reg) { 357 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); 358 } else { 359 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); 360 } 361 } else { 362 if (ireg == d.reg) { 363 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 364 } else { 365 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 366 } 367 } 368 } else { 369 int shift = sh-dh; 370 if (shift>0) { 371 if (ireg == d.reg) { 372 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 373 } else { 374 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 375 } 376 } else if (shift<0) { 377 if (ireg == d.reg) { 378 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); 379 } else { 380 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); 381 } 382 } else { 383 if (ireg == d.reg) { 384 if (s.reg != d.reg) { 385 MOV(AL, 0, d.reg, s.reg); 386 } 387 } else { 388 ORR(AL, 0, d.reg, d.reg, s.reg); 389 } 390 } 391 } 392} 393 394}; // namespace android 395