14e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org/*
2ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Copyright 2009 The Android Open Source Project
3ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com *
4ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Use of this source code is governed by a BSD-style license that can be
5ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * found in the LICENSE file.
64e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org */
74e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org
84e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org#include <emmintrin.h>
94e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org#include "SkUtils_opts_SSE2.h"
10fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
114e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.orgvoid sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count)
124e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org{
134e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    SkASSERT(dst != NULL && count >= 0);
144e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org
154e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    // dst must be 2-byte aligned.
164e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    SkASSERT((((size_t) dst) & 0x01) == 0);
174e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org
184e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    if (count >= 32) {
194e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        while (((size_t)dst) & 0x0F) {
204e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org            *dst++ = value;
214e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org            --count;
224e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        }
234e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        __m128i *d = reinterpret_cast<__m128i*>(dst);
244e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        __m128i value_wide = _mm_set1_epi16(value);
254e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        while (count >= 32) {
269772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d    , value_wide);
279772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d + 1, value_wide);
289772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d + 2, value_wide);
299772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d + 3, value_wide);
309772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            d += 4;
314e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org            count -= 32;
324e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        }
334e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        dst = reinterpret_cast<uint16_t*>(d);
344e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    }
354e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    while (count > 0) {
364e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        *dst++ = value;
374e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        --count;
384e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    }
394e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org}
40fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
414e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.orgvoid sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
424e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org{
434e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    SkASSERT(dst != NULL && count >= 0);
444e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org
454e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    // dst must be 4-byte aligned.
464e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    SkASSERT((((size_t) dst) & 0x03) == 0);
474e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org
484e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    if (count >= 16) {
494e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        while (((size_t)dst) & 0x0F) {
504e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org            *dst++ = value;
514e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org            --count;
524e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        }
534e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        __m128i *d = reinterpret_cast<__m128i*>(dst);
544e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        __m128i value_wide = _mm_set1_epi32(value);
554e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        while (count >= 16) {
569772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d    , value_wide);
579772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d + 1, value_wide);
589772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d + 2, value_wide);
599772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            _mm_store_si128(d + 3, value_wide);
609772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org            d += 4;
614e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org            count -= 16;
624e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        }
634e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        dst = reinterpret_cast<uint32_t*>(d);
644e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    }
654e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    while (count > 0) {
664e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        *dst++ = value;
674e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org        --count;
684e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org    }
694e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org}
70f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org
71f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.orgvoid sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
72f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org{
73f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org    if (count >= 16) {
74f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        while (((size_t)dst) & 0x0F) {
75f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            *dst++ = *src++;
76f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            --count;
77f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        }
78f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        __m128i *dst128 = reinterpret_cast<__m128i*>(dst);
79f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        const __m128i *src128 = reinterpret_cast<const __m128i*>(src);
80f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        while (count >= 16) {
81f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            __m128i a =  _mm_loadu_si128(src128++);
82f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            __m128i b =  _mm_loadu_si128(src128++);
83f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            __m128i c =  _mm_loadu_si128(src128++);
84f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            __m128i d =  _mm_loadu_si128(src128++);
85f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org
86f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            _mm_store_si128(dst128++, a);
87f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            _mm_store_si128(dst128++, b);
88f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            _mm_store_si128(dst128++, c);
89f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            _mm_store_si128(dst128++, d);
90f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org            count -= 16;
91f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        }
92f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        dst = reinterpret_cast<uint32_t*>(dst128);
93f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        src = reinterpret_cast<const uint32_t*>(src128);
94f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org    }
95f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org    while (count > 0) {
96f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        *dst++ = *src++;
97f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org        --count;
98f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org    }
99f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org}
100