140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger/*
21cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * Copyright 2009 The Android Open Source Project
31cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger *
41cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * Use of this source code is governed by a BSD-style license that can be
51cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * found in the LICENSE file.
640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger */
740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkBitmapProcState_opts_SSE2.h"
94f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#include "SkBitmapProcState_opts_SSSE3.h"
101cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger#include "SkBlitMask.h"
1140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkBlitRow_opts_SSE2.h"
1240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkUtils_opts_SSE2.h"
1340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkUtils.h"
1440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
1540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger/* This file must *not* be compiled with -msse or -msse2, otherwise
1640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger   gcc may generate sse2 even for scalar ops (and thus give an invalid
1740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger   instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
1840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger   in this directory should be compiled with -msse2. */
1940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
204f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
2140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#ifdef _MSC_VER
2240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic inline void getcpuid(int info_type, int info[4]) {
2340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    __asm {
2440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        mov    eax, [info_type]
2540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        cpuid
2640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        mov    edi, [info]
2740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        mov    [edi], eax
2840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        mov    [edi+4], ebx
2940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        mov    [edi+8], ecx
3040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        mov    [edi+12], edx
3140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    }
3240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
3340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#else
344f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#if defined(__x86_64__)
354f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic inline void getcpuid(int info_type, int info[4]) {
364f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    asm volatile (
374f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        "cpuid \n\t"
384f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
394f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        : "a"(info_type)
404f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    );
414f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger}
424f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#else
4340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic inline void getcpuid(int info_type, int info[4]) {
4440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    // We save and restore ebx, so this code can be compatible with -fPIC
4540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    asm volatile (
4640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        "pushl %%ebx      \n\t"
4740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        "cpuid            \n\t"
4840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        "movl %%ebx, %1   \n\t"
4940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        "popl %%ebx       \n\t"
5040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
5140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        : "a"(info_type)
5240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    );
5340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
5440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#endif
554f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#endif
564f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
574f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#if defined(__x86_64__) || defined(_WIN64)
584f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger/* All x86_64 machines have SSE2, so don't even bother checking. */
594f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic inline bool hasSSE2() {
604f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    return true;
614f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger}
624f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#else
6340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
6440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic inline bool hasSSE2() {
6540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    int cpu_info[4] = { 0 };
6640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    getcpuid(1, cpu_info);
6740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    return (cpu_info[3] & (1<<26)) != 0;
6840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
6940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#endif
7040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
714f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic inline bool hasSSSE3() {
724f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    int cpu_info[4] = { 0 };
734f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    getcpuid(1, cpu_info);
744f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    return (cpu_info[2] & 0x200) != 0;
754f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger}
764f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
771cab2921ab279367f8206cdadc9259d12e603548Derek Sollenbergerstatic bool cachedHasSSE2() {
781cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    static bool gHasSSE2 = hasSSE2();
791cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    return gHasSSE2;
801cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger}
811cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger
824f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic bool cachedHasSSSE3() {
834f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    static bool gHasSSSE3 = hasSSSE3();
844f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    return gHasSSSE3;
854f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger}
864f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
8740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergervoid SkBitmapProcState::platformProcs() {
884f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    if (cachedHasSSSE3()) {
894f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
904f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
914f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
924f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
934f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        }
944f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    } else if (cachedHasSSE2()) {
9540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
9640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger            fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
9740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
9840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger            fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
9940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        }
10040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    }
1014f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
1024f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    if (cachedHasSSSE3() || cachedHasSSE2()) {
1034f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        if (fMatrixProc == ClampX_ClampY_filter_scale) {
1044f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
1054f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
1064f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
1074f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        }
1084f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
1094f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        if (fMatrixProc == ClampX_ClampY_filter_affine) {
1104f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
1114f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
1124f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
1134f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        }
1144f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    }
11540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
11640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
11740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic SkBlitRow::Proc32 platform_32_procs[] = {
11840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    NULL,                               // S32_Opaque,
11940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    S32_Blend_BlitRow32_SSE2,           // S32_Blend,
12040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
12140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
12240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger};
12340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
12440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
12540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    return NULL;
12640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
12740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
12840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
12940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    return NULL;
13040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
13140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
13240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
1331cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    if (cachedHasSSE2()) {
13440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return Color32_SSE2;
13540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    } else {
13640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return NULL;
13740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    }
13840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
13940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
14040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
1411cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    if (cachedHasSSE2()) {
14240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return platform_32_procs[flags];
14340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    } else {
14440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return NULL;
14540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    }
14640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
14740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
14805b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger
1491cab2921ab279367f8206cdadc9259d12e603548Derek SollenbergerSkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
1501cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                                                     SkMask::Format maskFormat,
1511cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                                                     SkColor color) {
1521cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    if (SkMask::kA8_Format != maskFormat) {
1531cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger        return NULL;
1541cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    }
1551cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger
1561cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    ColorProc proc = NULL;
1571cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    if (cachedHasSSE2()) {
15805b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger        switch (dstConfig) {
15905b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger            case SkBitmap::kARGB_8888_Config:
1601cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                // The SSE2 version is not (yet) faster for black, so we check
1611cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                // for that.
1621cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                if (SK_ColorBLACK != color) {
1631cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                    proc = SkARGB32_A8_BlitMask_SSE2;
1641cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                }
16505b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger                break;
16605b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger            default:
1671cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                break;
16805b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger        }
16905b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger    }
17005b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger    return proc;
17105b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger}
17205b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger
1734f1dae40e24d57d647db01443b8bf2410514b8b5Derek SollenbergerSkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
1744f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    if (cachedHasSSE2()) {
1754f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        if (isOpaque) {
1764f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            return SkBlitLCD16OpaqueRow_SSE2;
1774f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        } else {
1784f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger            return SkBlitLCD16Row_SSE2;
1794f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        }
1804f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    } else {
1814f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger        return NULL;
1824f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger    }
1834f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger
1844f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger}
1851cab2921ab279367f8206cdadc9259d12e603548Derek SollenbergerSkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
1861cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                                                 SkMask::Format maskFormat,
1871cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger                                                 RowFlags flags) {
1881cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    return NULL;
1891cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger}
1901cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger
19140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkMemset16Proc SkMemset16GetPlatformProc() {
1921cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    if (cachedHasSSE2()) {
19340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return sk_memset16_SSE2;
19440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    } else {
19540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return NULL;
19640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    }
19740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
19840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger
19940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkMemset32Proc SkMemset32GetPlatformProc() {
2001cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger    if (cachedHasSSE2()) {
20140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return sk_memset32_SSE2;
20240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    } else {
20340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger        return NULL;
20440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger    }
20540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}
206