opts_check_SSE2.cpp revision 80bacfeb4bda06541e8695bd502229727bccfeab
1/* 2 * Copyright 2009 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkBitmapProcState_opts_SSE2.h" 9#include "SkBitmapProcState_opts_SSSE3.h" 10#include "SkBlitMask.h" 11#include "SkBlitRow.h" 12#include "SkBlitRect_opts_SSE2.h" 13#include "SkBlitRow_opts_SSE2.h" 14#include "SkUtils_opts_SSE2.h" 15#include "SkUtils.h" 16 17#if defined(_MSC_VER) && defined(_WIN64) 18#include <intrin.h> 19#endif 20 21/* This file must *not* be compiled with -msse or -msse2, otherwise 22 gcc may generate sse2 even for scalar ops (and thus give an invalid 23 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp 24 in this directory should be compiled with -msse2. */ 25 26 27#ifdef _MSC_VER 28static inline void getcpuid(int info_type, int info[4]) { 29#if defined(_WIN64) 30 __cpuid(info, info_type); 31#else 32 __asm { 33 mov eax, [info_type] 34 cpuid 35 mov edi, [info] 36 mov [edi], eax 37 mov [edi+4], ebx 38 mov [edi+8], ecx 39 mov [edi+12], edx 40 } 41#endif 42} 43#else 44#if defined(__x86_64__) 45static inline void getcpuid(int info_type, int info[4]) { 46 asm volatile ( 47 "cpuid \n\t" 48 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) 49 : "a"(info_type) 50 ); 51} 52#else 53static inline void getcpuid(int info_type, int info[4]) { 54 // We save and restore ebx, so this code can be compatible with -fPIC 55 asm volatile ( 56 "pushl %%ebx \n\t" 57 "cpuid \n\t" 58 "movl %%ebx, %1 \n\t" 59 "popl %%ebx \n\t" 60 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) 61 : "a"(info_type) 62 ); 63} 64#endif 65#endif 66 67#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 68/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */ 69static inline bool hasSSE2() { 70 return true; 71} 72#else 73 74static inline bool hasSSE2() { 75 int cpu_info[4] = { 0 }; 76 getcpuid(1, cpu_info); 77 return (cpu_info[3] & (1<<26)) != 0; 78} 79#endif 80 81#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 82/* If we know SSSE3 is supported at compile time, don't even bother checking. */ 83static inline bool hasSSSE3() { 84 return true; 85} 86#else 87 88static inline bool hasSSSE3() { 89 int cpu_info[4] = { 0 }; 90 getcpuid(1, cpu_info); 91 return (cpu_info[2] & 0x200) != 0; 92} 93#endif 94 95static bool cachedHasSSE2() { 96 static bool gHasSSE2 = hasSSE2(); 97 return gHasSSE2; 98} 99 100static bool cachedHasSSSE3() { 101 static bool gHasSSSE3 = hasSSSE3(); 102 return gHasSSSE3; 103} 104 105void SkBitmapProcState::platformProcs() { 106 if (cachedHasSSSE3()) { 107#if !defined(SK_BUILD_FOR_ANDROID) 108 // Disable SSSE3 optimization for Android x86 109 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 110 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 111 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 112 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; 113 } 114 115 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { 116 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; 117 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { 118 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; 119 } 120#endif 121 } else if (cachedHasSSE2()) { 122 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 123 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 124 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 125 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; 126 } 127 128 if (fSampleProc16 == S32_D16_filter_DX) { 129 fSampleProc16 = S32_D16_filter_DX_SSE2; 130 } 131 } 132 133 if (cachedHasSSSE3() || cachedHasSSE2()) { 134 if (fMatrixProc == ClampX_ClampY_filter_scale) { 135 fMatrixProc = ClampX_ClampY_filter_scale_SSE2; 136 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { 137 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; 138 } 139 140 if (fMatrixProc == ClampX_ClampY_filter_affine) { 141 fMatrixProc = ClampX_ClampY_filter_affine_SSE2; 142 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { 143 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; 144 } 145 } 146} 147 148static SkBlitRow::Proc32 platform_32_procs[] = { 149 NULL, // S32_Opaque, 150 S32_Blend_BlitRow32_SSE2, // S32_Blend, 151 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque 152 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 153}; 154 155SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { 156 return NULL; 157} 158 159SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 160 return NULL; 161} 162 163SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 164 if (cachedHasSSE2()) { 165 return Color32_SSE2; 166 } else { 167 return NULL; 168 } 169} 170 171SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 172 if (cachedHasSSE2()) { 173 return platform_32_procs[flags]; 174 } else { 175 return NULL; 176 } 177} 178 179 180SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, 181 SkMask::Format maskFormat, 182 SkColor color) { 183 if (SkMask::kA8_Format != maskFormat) { 184 return NULL; 185 } 186 187 ColorProc proc = NULL; 188 if (cachedHasSSE2()) { 189 switch (dstConfig) { 190 case SkBitmap::kARGB_8888_Config: 191 // The SSE2 version is not (yet) faster for black, so we check 192 // for that. 193 if (SK_ColorBLACK != color) { 194 proc = SkARGB32_A8_BlitMask_SSE2; 195 } 196 break; 197 default: 198 break; 199 } 200 } 201 return proc; 202} 203 204SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 205 if (cachedHasSSE2()) { 206 if (isOpaque) { 207 return SkBlitLCD16OpaqueRow_SSE2; 208 } else { 209 return SkBlitLCD16Row_SSE2; 210 } 211 } else { 212 return NULL; 213 } 214 215} 216SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, 217 SkMask::Format maskFormat, 218 RowFlags flags) { 219 return NULL; 220} 221 222SkMemset16Proc SkMemset16GetPlatformProc() { 223 if (cachedHasSSE2()) { 224 return sk_memset16_SSE2; 225 } else { 226 return NULL; 227 } 228} 229 230SkMemset32Proc SkMemset32GetPlatformProc() { 231 if (cachedHasSSE2()) { 232 return sk_memset32_SSE2; 233 } else { 234 return NULL; 235 } 236} 237 238SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning 239 240SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { 241 if (cachedHasSSE2()) { 242 return ColorRect32_SSE2; 243 } else { 244 return NULL; 245 } 246} 247 248 249