1/* 2 * Copyright 2014 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include <arm_neon.h> 9 10#define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale) 11#define SCALE_FILTER_NAME MAKENAME(_filter_scale) 12 13#define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x) 14#define PACK_FILTER_Y_NAME MAKENAME(_pack_filter_y) 15#define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4) 16#define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4) 17 18#ifndef PREAMBLE 19 #define PREAMBLE(state) 20 #define PREAMBLE_PARAM_X 21 #define PREAMBLE_PARAM_Y 22 #define PREAMBLE_ARG_X 23 #define PREAMBLE_ARG_Y 24#endif 25 26static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, 27 uint32_t xy[], int count, int x, int y) { 28 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | 29 SkMatrix::kScale_Mask)) == 0); 30 31 PREAMBLE(s); 32 33 // we store y, x, x, x, x, x 34 const unsigned maxX = s.fPixmap.width() - 1; 35 SkFractionalInt fx; 36 { 37 const SkBitmapProcStateAutoMapper mapper(s, x, y); 38 const unsigned maxY = s.fPixmap.height() - 1; 39 *xy++ = TILEY_PROCF(mapper.fixedY(), maxY); 40 fx = mapper.fractionalIntX(); 41 } 42 43 if (0 == maxX) { 44 // all of the following X values must be 0 45 memset(xy, 0, count * sizeof(uint16_t)); 46 return; 47 } 48 49 const SkFractionalInt dx = s.fInvSxFractionalInt; 50 51#ifdef CHECK_FOR_DECAL 52 // test if we don't need to apply the tile proc 53 const SkFixed fixedFx = SkFractionalIntToFixed(fx); 54 const SkFixed fixedDx = SkFractionalIntToFixed(dx); 55 if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) { 56 decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count); 57 return; 58 } 59#endif 60 61 if (count >= 8) { 62 SkFractionalInt dx2 = dx+dx; 63 SkFractionalInt dx4 = dx2+dx2; 64 SkFractionalInt dx8 = dx4+dx4; 65 66 // now build fx/fx+dx/fx+2dx/fx+3dx 67 SkFractionalInt fx1, fx2, fx3; 68 int32x4_t lbase, hbase; 69 int16_t *dst16 = (int16_t *)xy; 70 71 fx1 = fx+dx; 72 fx2 = fx1+dx; 73 fx3 = fx2+dx; 74 75 lbase = vdupq_n_s32(SkFractionalIntToFixed(fx)); 76 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1); 77 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2); 78 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3); 79 hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4))); 80 81 // store & bump 82 while (count >= 8) { 83 84 int16x8_t fx8; 85 86 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX); 87 88 vst1q_s16(dst16, fx8); 89 90 // but preserving base & on to the next 91 lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); 92 hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); 93 dst16 += 8; 94 count -= 8; 95 fx += dx8; 96 }; 97 xy = (uint32_t *) dst16; 98 } 99 100 uint16_t* xx = (uint16_t*)xy; 101 for (int i = count; i > 0; --i) { 102 *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX); 103 fx += dx; 104 } 105} 106 107static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max, 108 SkFixed one PREAMBLE_PARAM_Y) { 109 unsigned i = TILEY_PROCF(f, max); 110 i = (i << 4) | EXTRACT_LOW_BITS(f, max); 111 return (i << 14) | (TILEY_PROCF((f + one), max)); 112} 113 114static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max, 115 SkFixed one PREAMBLE_PARAM_X) { 116 unsigned i = TILEX_PROCF(f, max); 117 i = (i << 4) | EXTRACT_LOW_BITS(f, max); 118 return (i << 14) | (TILEX_PROCF((f + one), max)); 119} 120 121static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max, 122 SkFixed one PREAMBLE_PARAM_X) { 123 int32x4_t ret, res, wide_one; 124 125 // Prepare constants 126 wide_one = vdupq_n_s32(one); 127 128 // Step 1 129 res = TILEX_PROCF_NEON4(f, max); 130 131 // Step 2 132 ret = EXTRACT_LOW_BITS_NEON4(f, max); 133 ret = vsliq_n_s32(ret, res, 4); 134 135 // Step 3 136 res = TILEX_PROCF_NEON4(f + wide_one, max); 137 ret = vorrq_s32(vshlq_n_s32(ret, 14), res); 138 139 return ret; 140} 141 142static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max, 143 SkFixed one PREAMBLE_PARAM_X) { 144 int32x4_t ret, res, wide_one; 145 146 // Prepare constants 147 wide_one = vdupq_n_s32(one); 148 149 // Step 1 150 res = TILEY_PROCF_NEON4(f, max); 151 152 // Step 2 153 ret = EXTRACT_LOW_BITS_NEON4(f, max); 154 ret = vsliq_n_s32(ret, res, 4); 155 156 // Step 3 157 res = TILEY_PROCF_NEON4(f + wide_one, max); 158 ret = vorrq_s32(vshlq_n_s32(ret, 14), res); 159 160 return ret; 161} 162 163static void SCALE_FILTER_NAME(const SkBitmapProcState& s, 164 uint32_t xy[], int count, int x, int y) { 165 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | 166 SkMatrix::kScale_Mask)) == 0); 167 SkASSERT(s.fInvKy == 0); 168 169 PREAMBLE(s); 170 171 const unsigned maxX = s.fPixmap.width() - 1; 172 const SkFixed one = s.fFilterOneX; 173 const SkFractionalInt dx = s.fInvSxFractionalInt; 174 SkFractionalInt fx; 175 176 { 177 const SkBitmapProcStateAutoMapper mapper(s, x, y); 178 const SkFixed fy = mapper.fixedY(); 179 const unsigned maxY = s.fPixmap.height() - 1; 180 // compute our two Y values up front 181 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y); 182 // now initialize fx 183 fx = mapper.fractionalIntX(); 184 } 185 186#ifdef CHECK_FOR_DECAL 187 // test if we don't need to apply the tile proc 188 const SkFixed fixedFx = SkFractionalIntToFixed(fx); 189 const SkFixed fixedDx = SkFractionalIntToFixed(dx); 190 if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) { 191 decal_filter_scale_neon(xy, fixedFx, fixedDx, count); 192 return; 193 } 194#endif 195 { 196 197 if (count >= 4) { 198 int32x4_t wide_fx; 199 200 wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx)); 201 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1); 202 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2); 203 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3); 204 205 while (count >= 4) { 206 int32x4_t res; 207 208 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X); 209 210 vst1q_u32(xy, vreinterpretq_u32_s32(res)); 211 212 wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx)); 213 fx += dx+dx+dx+dx; 214 xy += 4; 215 count -= 4; 216 } 217 } 218 219 while (--count >= 0) { 220 *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X); 221 fx += dx; 222 } 223 224 } 225} 226 227const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = { 228 SCALE_NOFILTER_NAME, 229 SCALE_FILTER_NAME, 230}; 231 232#undef TILEX_PROCF_NEON8 233#undef TILEY_PROCF_NEON8 234#undef TILEX_PROCF_NEON4 235#undef TILEY_PROCF_NEON4 236#undef EXTRACT_LOW_BITS_NEON4 237 238#undef MAKENAME 239#undef TILEX_PROCF 240#undef TILEY_PROCF 241#ifdef CHECK_FOR_DECAL 242 #undef CHECK_FOR_DECAL 243#endif 244 245#undef SCALE_NOFILTER_NAME 246#undef SCALE_FILTER_NAME 247 248#undef PREAMBLE 249#undef PREAMBLE_PARAM_X 250#undef PREAMBLE_PARAM_Y 251#undef PREAMBLE_ARG_X 252#undef PREAMBLE_ARG_Y 253 254#undef EXTRACT_LOW_BITS 255