16419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org/*
26419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org * Copyright 2014 The Android Open Source Project
36419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org *
46419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org * Use of this source code is governed by a BSD-style license that can be
56419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org * found in the LICENSE file.
66419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org */
76419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
86419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org#ifndef SkMath_opts_SSE2_DEFINED
96419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org#define SkMath_opts_SSE2_DEFINED
106419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
116419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org#include <emmintrin.h>
126419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
13e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org// Because no _mm_div_epi32() in SSE2, we use float division to emulate.
14e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org// When using this function, make sure a and b don't exceed float's precision.
15e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.orgstatic inline __m128i shim_mm_div_epi32(const __m128i& a, const __m128i& b) {
16e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org    __m128 x = _mm_cvtepi32_ps(a);
17e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org    __m128 y = _mm_cvtepi32_ps(b);
18e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org    return _mm_cvttps_epi32(_mm_div_ps(x, y));
19e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org}
20e1ba93ee01aa7df27197189ab4d82a7d5387dc8acommit-bot@chromium.org
216419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org// Portable version of SkSqrtBits is in SkMath.cpp.
226419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.orgstatic inline __m128i SkSqrtBits_SSE2(const __m128i& x, int count) {
236419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    __m128i root =  _mm_setzero_si128();
246419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    __m128i remHi = _mm_setzero_si128();
256419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    __m128i remLo = x;
266419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    __m128i one128 = _mm_set1_epi32(1);
276419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
286419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    do {
296419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        root = _mm_slli_epi32(root, 1);
306419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
316419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        remHi = _mm_or_si128(_mm_slli_epi32(remHi, 2),
326419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org                             _mm_srli_epi32(remLo, 30));
336419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        remLo = _mm_slli_epi32(remLo, 2);
346419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
356419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        __m128i testDiv = _mm_slli_epi32(root, 1);
366419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        testDiv = _mm_add_epi32(testDiv, _mm_set1_epi32(1));
376419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
386419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        __m128i cmp = _mm_cmplt_epi32(remHi, testDiv);
396419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        __m128i remHi1 = _mm_and_si128(cmp, remHi);
406419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        __m128i root1 = _mm_and_si128(cmp, root);
416419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        __m128i remHi2 = _mm_andnot_si128(cmp, _mm_sub_epi32(remHi, testDiv));
426419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        __m128i root2 = _mm_andnot_si128(cmp, _mm_add_epi32(root, one128));
436419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
446419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        remHi = _mm_or_si128(remHi1, remHi2);
456419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org        root = _mm_or_si128(root1, root2);
466419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    } while (--count >= 0);
476419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
486419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org    return root;
496419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org}
506419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org
516419a5edb8af999623e018b09c3dd88ec371d2e6commit-bot@chromium.org#endif // SkMath_opts_SSE2_DEFINED
52