opts_check_SSE2.cpp revision 80bacfeb4bda06541e8695bd502229727bccfeab
1/*
2 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkBitmapProcState_opts_SSE2.h"
9#include "SkBitmapProcState_opts_SSSE3.h"
10#include "SkBlitMask.h"
11#include "SkBlitRow.h"
12#include "SkBlitRect_opts_SSE2.h"
13#include "SkBlitRow_opts_SSE2.h"
14#include "SkUtils_opts_SSE2.h"
15#include "SkUtils.h"
16
17#if defined(_MSC_VER) && defined(_WIN64)
18#include <intrin.h>
19#endif
20
21/* This file must *not* be compiled with -msse or -msse2, otherwise
22   gcc may generate sse2 even for scalar ops (and thus give an invalid
23   instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
24   in this directory should be compiled with -msse2. */
25
26
27#ifdef _MSC_VER
28static inline void getcpuid(int info_type, int info[4]) {
29#if defined(_WIN64)
30    __cpuid(info, info_type);
31#else
32    __asm {
33        mov    eax, [info_type]
34        cpuid
35        mov    edi, [info]
36        mov    [edi], eax
37        mov    [edi+4], ebx
38        mov    [edi+8], ecx
39        mov    [edi+12], edx
40    }
41#endif
42}
43#else
44#if defined(__x86_64__)
45static inline void getcpuid(int info_type, int info[4]) {
46    asm volatile (
47        "cpuid \n\t"
48        : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
49        : "a"(info_type)
50    );
51}
52#else
53static inline void getcpuid(int info_type, int info[4]) {
54    // We save and restore ebx, so this code can be compatible with -fPIC
55    asm volatile (
56        "pushl %%ebx      \n\t"
57        "cpuid            \n\t"
58        "movl %%ebx, %1   \n\t"
59        "popl %%ebx       \n\t"
60        : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
61        : "a"(info_type)
62    );
63}
64#endif
65#endif
66
67#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
68/* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
69static inline bool hasSSE2() {
70    return true;
71}
72#else
73
74static inline bool hasSSE2() {
75    int cpu_info[4] = { 0 };
76    getcpuid(1, cpu_info);
77    return (cpu_info[3] & (1<<26)) != 0;
78}
79#endif
80
81#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
82/* If we know SSSE3 is supported at compile time, don't even bother checking. */
83static inline bool hasSSSE3() {
84    return true;
85}
86#else
87
88static inline bool hasSSSE3() {
89    int cpu_info[4] = { 0 };
90    getcpuid(1, cpu_info);
91    return (cpu_info[2] & 0x200) != 0;
92}
93#endif
94
95static bool cachedHasSSE2() {
96    static bool gHasSSE2 = hasSSE2();
97    return gHasSSE2;
98}
99
100static bool cachedHasSSSE3() {
101    static bool gHasSSSE3 = hasSSSE3();
102    return gHasSSSE3;
103}
104
105void SkBitmapProcState::platformProcs() {
106    if (cachedHasSSSE3()) {
107#if !defined(SK_BUILD_FOR_ANDROID)
108        // Disable SSSE3 optimization for Android x86
109        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
110            fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
111        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
112            fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
113        }
114
115        if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
116            fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
117        } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
118            fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
119        }
120#endif
121    } else if (cachedHasSSE2()) {
122        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
123            fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
124        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
125            fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
126        }
127
128        if (fSampleProc16 == S32_D16_filter_DX) {
129            fSampleProc16 = S32_D16_filter_DX_SSE2;
130        }
131    }
132
133    if (cachedHasSSSE3() || cachedHasSSE2()) {
134        if (fMatrixProc == ClampX_ClampY_filter_scale) {
135            fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
136        } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
137            fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
138        }
139
140        if (fMatrixProc == ClampX_ClampY_filter_affine) {
141            fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
142        } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
143            fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
144        }
145    }
146}
147
148static SkBlitRow::Proc32 platform_32_procs[] = {
149    NULL,                               // S32_Opaque,
150    S32_Blend_BlitRow32_SSE2,           // S32_Blend,
151    S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
152    S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
153};
154
155SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
156    return NULL;
157}
158
159SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
160    return NULL;
161}
162
163SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
164    if (cachedHasSSE2()) {
165        return Color32_SSE2;
166    } else {
167        return NULL;
168    }
169}
170
171SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
172    if (cachedHasSSE2()) {
173        return platform_32_procs[flags];
174    } else {
175        return NULL;
176    }
177}
178
179
180SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
181                                                     SkMask::Format maskFormat,
182                                                     SkColor color) {
183    if (SkMask::kA8_Format != maskFormat) {
184        return NULL;
185    }
186
187    ColorProc proc = NULL;
188    if (cachedHasSSE2()) {
189        switch (dstConfig) {
190            case SkBitmap::kARGB_8888_Config:
191                // The SSE2 version is not (yet) faster for black, so we check
192                // for that.
193                if (SK_ColorBLACK != color) {
194                    proc = SkARGB32_A8_BlitMask_SSE2;
195                }
196                break;
197            default:
198                break;
199        }
200    }
201    return proc;
202}
203
204SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
205    if (cachedHasSSE2()) {
206        if (isOpaque) {
207            return SkBlitLCD16OpaqueRow_SSE2;
208        } else {
209            return SkBlitLCD16Row_SSE2;
210        }
211    } else {
212        return NULL;
213    }
214
215}
216SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
217                                                 SkMask::Format maskFormat,
218                                                 RowFlags flags) {
219    return NULL;
220}
221
222SkMemset16Proc SkMemset16GetPlatformProc() {
223    if (cachedHasSSE2()) {
224        return sk_memset16_SSE2;
225    } else {
226        return NULL;
227    }
228}
229
230SkMemset32Proc SkMemset32GetPlatformProc() {
231    if (cachedHasSSE2()) {
232        return sk_memset32_SSE2;
233    } else {
234        return NULL;
235    }
236}
237
238SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
239
240SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
241    if (cachedHasSSE2()) {
242        return ColorRect32_SSE2;
243    } else {
244        return NULL;
245    }
246}
247
248
249