1c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell/* 2c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell * Copyright 2014 The Android Open Source Project 3c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell * 4c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell * Use of this source code is governed by a BSD-style license that can be 5c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell * found in the LICENSE file. 6c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell */ 7c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell 8c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell#ifndef SkMath_opts_SSE2_DEFINED 9c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell#define SkMath_opts_SSE2_DEFINED 10c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell 11c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell#include <emmintrin.h> 12c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell 13c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell// Because no _mm_div_epi32() in SSE2, we use float division to emulate. 14c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell// When using this function, make sure a and b don't exceed float's precision. 15c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powellstatic inline __m128i shim_mm_div_epi32(const __m128i& a, const __m128i& b) { 16c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell __m128 x = _mm_cvtepi32_ps(a); 17c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell __m128 y = _mm_cvtepi32_ps(b); 18c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell return _mm_cvttps_epi32(_mm_div_ps(x, y)); 19c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell} 20c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell 21c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell// Portable version of SkSqrtBits is in SkMath.cpp. 22c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powellstatic inline __m128i SkSqrtBits_SSE2(const __m128i& x, int count) { 23c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell __m128i root = _mm_setzero_si128(); 24c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell __m128i remHi = _mm_setzero_si128(); 25c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell __m128i remLo = x; 2684bbe6fe39c63a7e67a3ca937981a463c11ac9edScott Main __m128i one128 = _mm_set1_epi32(1); 27c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell 28c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell do { 29c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell root = _mm_slli_epi32(root, 1); 30c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell 31c9cf2eb0a9b6694d0fda3dbc313844955db60820Adam Powell remHi = _mm_or_si128(_mm_slli_epi32(remHi, 2), 32 _mm_srli_epi32(remLo, 30)); 33 remLo = _mm_slli_epi32(remLo, 2); 34 35 __m128i testDiv = _mm_slli_epi32(root, 1); 36 testDiv = _mm_add_epi32(testDiv, _mm_set1_epi32(1)); 37 38 __m128i cmp = _mm_cmplt_epi32(remHi, testDiv); 39 __m128i remHi1 = _mm_and_si128(cmp, remHi); 40 __m128i root1 = _mm_and_si128(cmp, root); 41 __m128i remHi2 = _mm_andnot_si128(cmp, _mm_sub_epi32(remHi, testDiv)); 42 __m128i root2 = _mm_andnot_si128(cmp, _mm_add_epi32(root, one128)); 43 44 remHi = _mm_or_si128(remHi1, remHi2); 45 root = _mm_or_si128(root1, root2); 46 } while (--count >= 0); 47 48 return root; 49} 50 51#endif // SkMath_opts_SSE2_DEFINED 52