1/* 2 * Copyright 2009 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkBitmapFilter_opts_SSE2.h" 9#include "SkBitmapProcState_opts_SSE2.h" 10#include "SkBitmapProcState_opts_SSSE3.h" 11#include "SkBlitMask.h" 12#include "SkBlitRect_opts_SSE2.h" 13#include "SkBlitRow.h" 14#include "SkBlitRow_opts_SSE2.h" 15#include "SkBlurImage_opts_SSE2.h" 16#include "SkMorphology_opts.h" 17#include "SkMorphology_opts_SSE2.h" 18#include "SkRTConf.h" 19#include "SkUtils.h" 20#include "SkUtils_opts_SSE2.h" 21#include "SkXfermode.h" 22#include "SkXfermode_proccoeff.h" 23 24#if defined(_MSC_VER) && defined(_WIN64) 25#include <intrin.h> 26#endif 27 28/* This file must *not* be compiled with -msse or any other optional SIMD 29 extension, otherwise gcc may generate SIMD instructions even for scalar ops 30 (and thus give an invalid instruction on Pentium3 on the code below). 31 For example, only files named *_SSE2.cpp in this directory should be 32 compiled with -msse2 or higher. */ 33 34 35/* Function to get the CPU SSE-level in runtime, for different compilers. */ 36#ifdef _MSC_VER 37static inline void getcpuid(int info_type, int info[4]) { 38#if defined(_WIN64) 39 __cpuid(info, info_type); 40#else 41 __asm { 42 mov eax, [info_type] 43 cpuid 44 mov edi, [info] 45 mov [edi], eax 46 mov [edi+4], ebx 47 mov [edi+8], ecx 48 mov [edi+12], edx 49 } 50#endif 51} 52#elif defined(__x86_64__) 53static inline void getcpuid(int info_type, int info[4]) { 54 asm volatile ( 55 "cpuid \n\t" 56 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) 57 : "a"(info_type) 58 ); 59} 60#else 61static inline void getcpuid(int info_type, int info[4]) { 62 // We save and restore ebx, so this code can be compatible with -fPIC 63 asm volatile ( 64 "pushl %%ebx \n\t" 65 "cpuid \n\t" 66 "movl %%ebx, %1 \n\t" 67 "popl %%ebx \n\t" 68 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) 69 : "a"(info_type) 70 ); 71} 72#endif 73 74//////////////////////////////////////////////////////////////////////////////// 75 76/* Fetch the SIMD level directly from the CPU, at run-time. 77 * Only checks the levels needed by the optimizations in this file. 78 */ 79static int get_SIMD_level() { 80 int cpu_info[4] = { 0 }; 81 82 getcpuid(1, cpu_info); 83 if ((cpu_info[2] & (1<<20)) != 0) { 84 return SK_CPU_SSE_LEVEL_SSE42; 85 } else if ((cpu_info[2] & (1<<9)) != 0) { 86 return SK_CPU_SSE_LEVEL_SSSE3; 87 } else if ((cpu_info[3] & (1<<26)) != 0) { 88 return SK_CPU_SSE_LEVEL_SSE2; 89 } else { 90 return 0; 91 } 92} 93 94/* Verify that the requested SIMD level is supported in the build. 95 * If not, check if the platform supports it. 96 */ 97static inline bool supports_simd(int minLevel) { 98#if defined(SK_CPU_SSE_LEVEL) 99 if (minLevel <= SK_CPU_SSE_LEVEL) { 100 return true; 101 } else 102#endif 103 { 104#if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) 105 /* For the Android framework we should always know at compile time if the device 106 * we are building for supports SSSE3. The one exception to this rule is on the 107 * emulator where we are compiled without the -mssse3 option (so we have no 108 * SSSE3 procs) but can be run on a host machine that supports SSSE3 109 * instructions. So for that particular case we disable our SSSE3 options. 110 */ 111 return false; 112#else 113 static int gSIMDLevel = get_SIMD_level(); 114 return (minLevel <= gSIMDLevel); 115#endif 116 } 117} 118 119//////////////////////////////////////////////////////////////////////////////// 120 121SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); 122 123void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { 124 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 125 procs->fExtraHorizontalReads = 3; 126 procs->fConvolveVertically = &convolveVertically_SSE2; 127 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; 128 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; 129 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; 130 } 131} 132 133//////////////////////////////////////////////////////////////////////////////// 134 135void SkBitmapProcState::platformProcs() { 136 /* Every optimization in the function requires at least SSE2 */ 137 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 138 return; 139 } 140 141 /* Check fSampleProc32 */ 142 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 143 if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) { 144 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 145 } else { 146 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 147 } 148 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { 149 if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) { 150 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; 151 } 152 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 153 if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) { 154 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; 155 } else { 156 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; 157 } 158 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { 159 if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) { 160 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; 161 } 162 } 163 164 /* Check fSampleProc16 */ 165 if (fSampleProc16 == S32_D16_filter_DX) { 166 fSampleProc16 = S32_D16_filter_DX_SSE2; 167 } 168 169 /* Check fMatrixProc */ 170 if (fMatrixProc == ClampX_ClampY_filter_scale) { 171 fMatrixProc = ClampX_ClampY_filter_scale_SSE2; 172 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { 173 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; 174 } else if (fMatrixProc == ClampX_ClampY_filter_affine) { 175 fMatrixProc = ClampX_ClampY_filter_affine_SSE2; 176 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { 177 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; 178 } 179 180 /* Check fShaderProc32 */ 181 if (c_hqfilter_sse) { 182 if (fShaderProc32 == highQualityFilter32) { 183 fShaderProc32 = highQualityFilter_SSE2; 184 } 185 } 186} 187 188//////////////////////////////////////////////////////////////////////////////// 189 190static SkBlitRow::Proc platform_16_procs[] = { 191 S32_D565_Opaque_SSE2, // S32_D565_Opaque 192 NULL, // S32_D565_Blend 193 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque 194 NULL, // S32A_D565_Blend 195 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither 196 NULL, // S32_D565_Blend_Dither 197 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither 198 NULL, // S32A_D565_Blend_Dither 199}; 200 201SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 202 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 203 return platform_16_procs[flags]; 204 } else { 205 return NULL; 206 } 207} 208 209static SkBlitRow::Proc32 platform_32_procs[] = { 210 NULL, // S32_Opaque, 211 S32_Blend_BlitRow32_SSE2, // S32_Blend, 212 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque 213 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 214}; 215 216SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 217 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 218 return platform_32_procs[flags]; 219 } else { 220 return NULL; 221 } 222} 223 224SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 225 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 226 return Color32_SSE2; 227 } else { 228 return NULL; 229 } 230} 231 232SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning 233 234SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { 235/* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled. 236 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 237 return ColorRect32_SSE2; 238 } else { 239 return NULL; 240 } 241*/ 242 return NULL; 243} 244 245//////////////////////////////////////////////////////////////////////////////// 246 247SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT, 248 SkMask::Format maskFormat, 249 SkColor color) { 250 if (SkMask::kA8_Format != maskFormat) { 251 return NULL; 252 } 253 254 ColorProc proc = NULL; 255 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 256 switch (dstCT) { 257 case kN32_SkColorType: 258 // The SSE2 version is not (yet) faster for black, so we check 259 // for that. 260 if (SK_ColorBLACK != color) { 261 proc = SkARGB32_A8_BlitMask_SSE2; 262 } 263 break; 264 default: 265 break; 266 } 267 } 268 return proc; 269} 270 271SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 272 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 273 if (isOpaque) { 274 return SkBlitLCD16OpaqueRow_SSE2; 275 } else { 276 return SkBlitLCD16Row_SSE2; 277 } 278 } else { 279 return NULL; 280 } 281 282} 283 284SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) { 285 return NULL; 286} 287 288//////////////////////////////////////////////////////////////////////////////// 289 290SkMemset16Proc SkMemset16GetPlatformProc() { 291 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 292 return sk_memset16_SSE2; 293 } else { 294 return NULL; 295 } 296} 297 298SkMemset32Proc SkMemset32GetPlatformProc() { 299 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 300 return sk_memset32_SSE2; 301 } else { 302 return NULL; 303 } 304} 305 306SkMemcpy32Proc SkMemcpy32GetPlatformProc() { 307 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 308 return sk_memcpy32_SSE2; 309 } else { 310 return NULL; 311 } 312} 313 314//////////////////////////////////////////////////////////////////////////////// 315 316SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) { 317 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 318 return NULL; 319 } 320 switch (type) { 321 case kDilateX_SkMorphologyProcType: 322 return SkDilateX_SSE2; 323 case kDilateY_SkMorphologyProcType: 324 return SkDilateY_SSE2; 325 case kErodeX_SkMorphologyProcType: 326 return SkErodeX_SSE2; 327 case kErodeY_SkMorphologyProcType: 328 return SkErodeY_SSE2; 329 default: 330 return NULL; 331 } 332} 333 334//////////////////////////////////////////////////////////////////////////////// 335 336bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, 337 SkBoxBlurProc* boxBlurY, 338 SkBoxBlurProc* boxBlurXY, 339 SkBoxBlurProc* boxBlurYX) { 340#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION 341 return false; 342#else 343 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 344 return false; 345 } 346 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX); 347#endif 348} 349 350//////////////////////////////////////////////////////////////////////////////// 351 352extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, 353 SkXfermode::Mode mode); 354 355SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, 356 SkXfermode::Mode mode); 357 358SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, 359 SkXfermode::Mode mode) { 360 return NULL; 361} 362 363SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, 364 SkXfermode::Mode mode); 365 366SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, 367 SkXfermode::Mode mode) { 368 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 369 return SkPlatformXfermodeFactory_impl_SSE2(rec, mode); 370 } else { 371 return SkPlatformXfermodeFactory_impl(rec, mode); 372 } 373} 374 375SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); 376 377SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { 378 return NULL; 379} 380