14e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org/* 2ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Copyright 2009 The Android Open Source Project 3ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * 4ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * Use of this source code is governed by a BSD-style license that can be 5ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976eepoger@google.com * found in the LICENSE file. 64e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org */ 74e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org 84e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org#include <emmintrin.h> 94e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org#include "SkUtils_opts_SSE2.h" 10fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 114e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.orgvoid sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count) 124e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org{ 134e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org SkASSERT(dst != NULL && count >= 0); 144e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org 154e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org // dst must be 2-byte aligned. 164e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org SkASSERT((((size_t) dst) & 0x01) == 0); 174e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org 184e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org if (count >= 32) { 194e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org while (((size_t)dst) & 0x0F) { 204e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org *dst++ = value; 214e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org --count; 224e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 234e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org __m128i *d = reinterpret_cast<__m128i*>(dst); 244e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org __m128i value_wide = _mm_set1_epi16(value); 254e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org while (count >= 32) { 269772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d , value_wide); 279772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d + 1, value_wide); 289772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d + 2, value_wide); 299772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d + 3, value_wide); 309772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org d += 4; 314e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org count -= 32; 324e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 334e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org dst = reinterpret_cast<uint16_t*>(d); 344e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 354e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org while (count > 0) { 364e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org *dst++ = value; 374e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org --count; 384e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 394e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org} 40fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 414e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.orgvoid sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) 424e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org{ 434e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org SkASSERT(dst != NULL && count >= 0); 444e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org 454e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org // dst must be 4-byte aligned. 464e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org SkASSERT((((size_t) dst) & 0x03) == 0); 474e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org 484e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org if (count >= 16) { 494e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org while (((size_t)dst) & 0x0F) { 504e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org *dst++ = value; 514e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org --count; 524e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 534e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org __m128i *d = reinterpret_cast<__m128i*>(dst); 544e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org __m128i value_wide = _mm_set1_epi32(value); 554e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org while (count >= 16) { 569772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d , value_wide); 579772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d + 1, value_wide); 589772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d + 2, value_wide); 599772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org _mm_store_si128(d + 3, value_wide); 609772a52f0d9e540d2a360dde2aab0ad41c90b1d8commit-bot@chromium.org d += 4; 614e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org count -= 16; 624e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 634e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org dst = reinterpret_cast<uint32_t*>(d); 644e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 654e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org while (count > 0) { 664e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org *dst++ = value; 674e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org --count; 684e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org } 694e753558fc8cc2f77cbcd46fba80d8612e836a1esenorblanco@chromium.org} 70f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org 71f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.orgvoid sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count) 72f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org{ 73f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org if (count >= 16) { 74f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org while (((size_t)dst) & 0x0F) { 75f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org *dst++ = *src++; 76f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org --count; 77f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org } 78f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org __m128i *dst128 = reinterpret_cast<__m128i*>(dst); 79f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org const __m128i *src128 = reinterpret_cast<const __m128i*>(src); 80f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org while (count >= 16) { 81f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org __m128i a = _mm_loadu_si128(src128++); 82f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org __m128i b = _mm_loadu_si128(src128++); 83f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org __m128i c = _mm_loadu_si128(src128++); 84f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org __m128i d = _mm_loadu_si128(src128++); 85f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org 86f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org _mm_store_si128(dst128++, a); 87f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org _mm_store_si128(dst128++, b); 88f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org _mm_store_si128(dst128++, c); 89f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org _mm_store_si128(dst128++, d); 90f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org count -= 16; 91f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org } 92f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org dst = reinterpret_cast<uint32_t*>(dst128); 93f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org src = reinterpret_cast<const uint32_t*>(src128); 94f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org } 95f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org while (count > 0) { 96f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org *dst++ = *src++; 97f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org --count; 98f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org } 99f0ea77a3630e6d1c01d83aa5430b3780da9e88b6commit-bot@chromium.org} 100