140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger/* 21cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * Copyright 2009 The Android Open Source Project 31cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * 41cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * Use of this source code is governed by a BSD-style license that can be 51cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger * found in the LICENSE file. 640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger */ 740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkBitmapProcState_opts_SSE2.h" 94f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#include "SkBitmapProcState_opts_SSSE3.h" 101cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger#include "SkBlitMask.h" 1140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkBlitRow_opts_SSE2.h" 1240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkUtils_opts_SSE2.h" 1340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#include "SkUtils.h" 1440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 1540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger/* This file must *not* be compiled with -msse or -msse2, otherwise 1640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger gcc may generate sse2 even for scalar ops (and thus give an invalid 1740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger instruction on Pentium3 on the code below). Only files named *_SSE2.cpp 1840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger in this directory should be compiled with -msse2. */ 1940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 204f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 2140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#ifdef _MSC_VER 2240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic inline void getcpuid(int info_type, int info[4]) { 2340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger __asm { 2440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger mov eax, [info_type] 2540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger cpuid 2640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger mov edi, [info] 2740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger mov [edi], eax 2840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger mov [edi+4], ebx 2940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger mov [edi+8], ecx 3040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger mov [edi+12], edx 3140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 3240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 3340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#else 344f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#if defined(__x86_64__) 354f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic inline void getcpuid(int info_type, int info[4]) { 364f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger asm volatile ( 374f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger "cpuid \n\t" 384f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) 394f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger : "a"(info_type) 404f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger ); 414f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger} 424f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#else 4340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic inline void getcpuid(int info_type, int info[4]) { 4440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger // We save and restore ebx, so this code can be compatible with -fPIC 4540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger asm volatile ( 4640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger "pushl %%ebx \n\t" 4740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger "cpuid \n\t" 4840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger "movl %%ebx, %1 \n\t" 4940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger "popl %%ebx \n\t" 5040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) 5140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger : "a"(info_type) 5240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger ); 5340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 5440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#endif 554f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#endif 564f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 574f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#if defined(__x86_64__) || defined(_WIN64) 584f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger/* All x86_64 machines have SSE2, so don't even bother checking. */ 594f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic inline bool hasSSE2() { 604f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger return true; 614f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger} 624f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger#else 6340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 6440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic inline bool hasSSE2() { 6540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger int cpu_info[4] = { 0 }; 6640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger getcpuid(1, cpu_info); 6740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return (cpu_info[3] & (1<<26)) != 0; 6840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 6940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger#endif 7040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 714f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic inline bool hasSSSE3() { 724f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger int cpu_info[4] = { 0 }; 734f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger getcpuid(1, cpu_info); 744f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger return (cpu_info[2] & 0x200) != 0; 754f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger} 764f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 771cab2921ab279367f8206cdadc9259d12e603548Derek Sollenbergerstatic bool cachedHasSSE2() { 781cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger static bool gHasSSE2 = hasSSE2(); 791cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger return gHasSSE2; 801cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger} 811cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger 824f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenbergerstatic bool cachedHasSSSE3() { 834f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger static bool gHasSSSE3 = hasSSSE3(); 844f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger return gHasSSSE3; 854f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger} 864f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 8740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergervoid SkBitmapProcState::platformProcs() { 884f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (cachedHasSSSE3()) { 894f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (fSampleProc32 == S32_opaque_D32_filter_DX) { 904f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 914f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 924f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; 934f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } 944f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } else if (cachedHasSSE2()) { 9540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger if (fSampleProc32 == S32_opaque_D32_filter_DX) { 9640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 9740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 9840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; 9940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 10040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 1014f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 1024f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (cachedHasSSSE3() || cachedHasSSE2()) { 1034f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (fMatrixProc == ClampX_ClampY_filter_scale) { 1044f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger fMatrixProc = ClampX_ClampY_filter_scale_SSE2; 1054f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { 1064f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; 1074f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } 1084f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 1094f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (fMatrixProc == ClampX_ClampY_filter_affine) { 1104f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger fMatrixProc = ClampX_ClampY_filter_affine_SSE2; 1114f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { 1124f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; 1134f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } 1144f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } 11540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 11640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 11740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenbergerstatic SkBlitRow::Proc32 platform_32_procs[] = { 11840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger NULL, // S32_Opaque, 11940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger S32_Blend_BlitRow32_SSE2, // S32_Blend, 12040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque 12140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 12240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger}; 12340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 12440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { 12540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return NULL; 12640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 12740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 12840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 12940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return NULL; 13040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 13140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 13240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 1331cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (cachedHasSSE2()) { 13440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return Color32_SSE2; 13540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } else { 13640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return NULL; 13740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 13840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 13940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 14040528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 1411cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (cachedHasSSE2()) { 14240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return platform_32_procs[flags]; 14340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } else { 14440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return NULL; 14540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 14640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 14740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 14805b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger 1491cab2921ab279367f8206cdadc9259d12e603548Derek SollenbergerSkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, 1501cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger SkMask::Format maskFormat, 1511cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger SkColor color) { 1521cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (SkMask::kA8_Format != maskFormat) { 1531cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger return NULL; 1541cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger } 1551cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger 1561cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger ColorProc proc = NULL; 1571cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (cachedHasSSE2()) { 15805b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger switch (dstConfig) { 15905b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger case SkBitmap::kARGB_8888_Config: 1601cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger // The SSE2 version is not (yet) faster for black, so we check 1611cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger // for that. 1621cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (SK_ColorBLACK != color) { 1631cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger proc = SkARGB32_A8_BlitMask_SSE2; 1641cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger } 16505b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger break; 16605b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger default: 1671cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger break; 16805b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger } 16905b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger } 17005b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger return proc; 17105b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger} 17205b6b4d746867a9fb02e14edfe1bf3685abeb813Derek Sollenberger 1734f1dae40e24d57d647db01443b8bf2410514b8b5Derek SollenbergerSkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 1744f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (cachedHasSSE2()) { 1754f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger if (isOpaque) { 1764f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger return SkBlitLCD16OpaqueRow_SSE2; 1774f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } else { 1784f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger return SkBlitLCD16Row_SSE2; 1794f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } 1804f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } else { 1814f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger return NULL; 1824f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger } 1834f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger 1844f1dae40e24d57d647db01443b8bf2410514b8b5Derek Sollenberger} 1851cab2921ab279367f8206cdadc9259d12e603548Derek SollenbergerSkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, 1861cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger SkMask::Format maskFormat, 1871cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger RowFlags flags) { 1881cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger return NULL; 1891cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger} 1901cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger 19140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkMemset16Proc SkMemset16GetPlatformProc() { 1921cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (cachedHasSSE2()) { 19340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return sk_memset16_SSE2; 19440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } else { 19540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return NULL; 19640528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 19740528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 19840528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger 19940528743dbb9ce7f39f093e0cdc47849ac8887cfDerek SollenbergerSkMemset32Proc SkMemset32GetPlatformProc() { 2001cab2921ab279367f8206cdadc9259d12e603548Derek Sollenberger if (cachedHasSSE2()) { 20140528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return sk_memset32_SSE2; 20240528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } else { 20340528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger return NULL; 20440528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger } 20540528743dbb9ce7f39f093e0cdc47849ac8887cfDerek Sollenberger} 206