1/*
2 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkBitmapProcState_opts_SSE2.h"
9#include "SkBitmapProcState_opts_SSSE3.h"
10#include "SkBlitMask.h"
11#include "SkBlitRow_opts_SSE2.h"
12#include "SkUtils_opts_SSE2.h"
13#include "SkUtils.h"
14
15/* This file must *not* be compiled with -msse or -msse2, otherwise
16   gcc may generate sse2 even for scalar ops (and thus give an invalid
17   instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
18   in this directory should be compiled with -msse2. */
19
20
21#ifdef _MSC_VER
22static inline void getcpuid(int info_type, int info[4]) {
23    __asm {
24        mov    eax, [info_type]
25        cpuid
26        mov    edi, [info]
27        mov    [edi], eax
28        mov    [edi+4], ebx
29        mov    [edi+8], ecx
30        mov    [edi+12], edx
31    }
32}
33#else
34#if defined(__x86_64__)
35static inline void getcpuid(int info_type, int info[4]) {
36    asm volatile (
37        "cpuid \n\t"
38        : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
39        : "a"(info_type)
40    );
41}
42#else
43static inline void getcpuid(int info_type, int info[4]) {
44    // We save and restore ebx, so this code can be compatible with -fPIC
45    asm volatile (
46        "pushl %%ebx      \n\t"
47        "cpuid            \n\t"
48        "movl %%ebx, %1   \n\t"
49        "popl %%ebx       \n\t"
50        : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
51        : "a"(info_type)
52    );
53}
54#endif
55#endif
56
57#if defined(__x86_64__) || defined(_WIN64)
58/* All x86_64 machines have SSE2, so don't even bother checking. */
59static inline bool hasSSE2() {
60    return true;
61}
62#else
63
64static inline bool hasSSE2() {
65    int cpu_info[4] = { 0 };
66    getcpuid(1, cpu_info);
67    return (cpu_info[3] & (1<<26)) != 0;
68}
69#endif
70
71static inline bool hasSSSE3() {
72    int cpu_info[4] = { 0 };
73    getcpuid(1, cpu_info);
74    return (cpu_info[2] & 0x200) != 0;
75}
76
77static bool cachedHasSSE2() {
78    static bool gHasSSE2 = hasSSE2();
79    return gHasSSE2;
80}
81
82static bool cachedHasSSSE3() {
83    static bool gHasSSSE3 = hasSSSE3();
84    return gHasSSSE3;
85}
86
87void SkBitmapProcState::platformProcs() {
88    if (cachedHasSSSE3()) {
89        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
90            fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
91        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
92            fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
93        }
94    } else if (cachedHasSSE2()) {
95        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
96            fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
97        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
98            fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
99        }
100    }
101
102    if (cachedHasSSSE3() || cachedHasSSE2()) {
103        if (fMatrixProc == ClampX_ClampY_filter_scale) {
104            fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
105        } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
106            fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
107        }
108
109        if (fMatrixProc == ClampX_ClampY_filter_affine) {
110            fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
111        } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
112            fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
113        }
114    }
115}
116
117static SkBlitRow::Proc32 platform_32_procs[] = {
118    NULL,                               // S32_Opaque,
119    S32_Blend_BlitRow32_SSE2,           // S32_Blend,
120    S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
121    S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
122};
123
124SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
125    return NULL;
126}
127
128SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
129    return NULL;
130}
131
132SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
133    if (cachedHasSSE2()) {
134        return Color32_SSE2;
135    } else {
136        return NULL;
137    }
138}
139
140SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
141    if (cachedHasSSE2()) {
142        return platform_32_procs[flags];
143    } else {
144        return NULL;
145    }
146}
147
148
149SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
150                                                     SkMask::Format maskFormat,
151                                                     SkColor color) {
152    if (SkMask::kA8_Format != maskFormat) {
153        return NULL;
154    }
155
156    ColorProc proc = NULL;
157    if (cachedHasSSE2()) {
158        switch (dstConfig) {
159            case SkBitmap::kARGB_8888_Config:
160                // The SSE2 version is not (yet) faster for black, so we check
161                // for that.
162                if (SK_ColorBLACK != color) {
163                    proc = SkARGB32_A8_BlitMask_SSE2;
164                }
165                break;
166            default:
167                break;
168        }
169    }
170    return proc;
171}
172
173SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
174    if (cachedHasSSE2()) {
175        if (isOpaque) {
176            return SkBlitLCD16OpaqueRow_SSE2;
177        } else {
178            return SkBlitLCD16Row_SSE2;
179        }
180    } else {
181        return NULL;
182    }
183
184}
185SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
186                                                 SkMask::Format maskFormat,
187                                                 RowFlags flags) {
188    return NULL;
189}
190
191SkMemset16Proc SkMemset16GetPlatformProc() {
192    if (cachedHasSSE2()) {
193        return sk_memset16_SSE2;
194    } else {
195        return NULL;
196    }
197}
198
199SkMemset32Proc SkMemset32GetPlatformProc() {
200    if (cachedHasSSE2()) {
201        return sk_memset32_SSE2;
202    } else {
203        return NULL;
204    }
205}
206