18c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Copyright 2014 Google Inc. All Rights Reserved. 28c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// 38c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Use of this source code is governed by a BSD-style license 48c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// that can be found in the COPYING file in the root of the source 58c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// tree. An additional intellectual property rights grant can be found 68c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// in the file PATENTS. All contributing project authors may 78c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// be found in the AUTHORS file in the root of the source tree. 88c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// ----------------------------------------------------------------------------- 98c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// 108c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Utilities for processing transparent channel. 118c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// 128c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Author: Skal (pascal.massimino@gmail.com) 138c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 148c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#include "./dsp.h" 158c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 168c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#if defined(WEBP_USE_SSE2) 178c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#include <emmintrin.h> 188c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 198c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//------------------------------------------------------------------------------ 208c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 218c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arorastatic int ExtractAlpha(const uint8_t* argb, int argb_stride, 228c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora int width, int height, 238c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora uint8_t* alpha, int alpha_stride) { 248c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // alpha_and stores an 'and' operation of all the alpha[] values. The final 258c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // value is not 0xff if any of the alpha[] is not equal to 0xff. 268c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora uint32_t alpha_and = 0xff; 278c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora int i, j; 288c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha 298c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); 308c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora __m128i all_alphas = all_0xff; 318c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 328c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // We must be able to access 3 extra bytes after the last written byte 338c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // 'src[4 * width - 4]', because we don't know if alpha is the first or the 348c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // last byte of the quadruplet. 358c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const int limit = (width - 1) & ~7; 368c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 378c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora for (j = 0; j < height; ++j) { 388c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i* src = (const __m128i*)argb; 398c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora for (i = 0; i < limit; i += 8) { 408c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // load 32 argb bytes 418c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i a0 = _mm_loadu_si128(src + 0); 428c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i a1 = _mm_loadu_si128(src + 1); 438c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i b0 = _mm_and_si128(a0, a_mask); 448c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i b1 = _mm_and_si128(a1, a_mask); 458c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i c0 = _mm_packs_epi32(b0, b1); 468c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const __m128i d0 = _mm_packus_epi16(c0, c0); 478c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // store 488c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora _mm_storel_epi64((__m128i*)&alpha[i], d0); 498c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // accumulate eight alpha 'and' in parallel 508c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora all_alphas = _mm_and_si128(all_alphas, d0); 518c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora src += 2; 528c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora } 538c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora for (; i < width; ++i) { 548c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora const uint32_t alpha_value = argb[4 * i]; 558c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora alpha[i] = alpha_value; 568c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora alpha_and &= alpha_value; 578c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora } 588c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora argb += argb_stride; 598c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora alpha += alpha_stride; 608c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora } 618c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora // Combine the eight alpha 'and' into a 8-bit mask. 628c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff)); 638c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora return (alpha_and == 0xff); 648c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora} 658c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 668c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#endif // WEBP_USE_SSE2 678c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 688c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//------------------------------------------------------------------------------ 698c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Init function 708c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 718c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Aroraextern void WebPInitAlphaProcessingSSE2(void); 728c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora 738c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Aroravoid WebPInitAlphaProcessingSSE2(void) { 748c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#if defined(WEBP_USE_SSE2) 758c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora WebPExtractAlpha = ExtractAlpha; 768c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#endif 778c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora} 78