18c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Copyright 2014 Google Inc. All Rights Reserved.
28c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//
38c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Use of this source code is governed by a BSD-style license
48c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// that can be found in the COPYING file in the root of the source
58c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// tree. An additional intellectual property rights grant can be found
68c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// in the file PATENTS. All contributing project authors may
78c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// be found in the AUTHORS file in the root of the source tree.
88c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// -----------------------------------------------------------------------------
98c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//
108c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Utilities for processing transparent channel.
118c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//
128c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Author: Skal (pascal.massimino@gmail.com)
138c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
148c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#include "./dsp.h"
158c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
168c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#if defined(WEBP_USE_SSE2)
178c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#include <emmintrin.h>
188c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
198c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//------------------------------------------------------------------------------
208c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
218c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arorastatic int ExtractAlpha(const uint8_t* argb, int argb_stride,
228c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora                        int width, int height,
238c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora                        uint8_t* alpha, int alpha_stride) {
248c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  // alpha_and stores an 'and' operation of all the alpha[] values. The final
258c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  // value is not 0xff if any of the alpha[] is not equal to 0xff.
268c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  uint32_t alpha_and = 0xff;
278c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  int i, j;
288c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
298c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
308c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  __m128i all_alphas = all_0xff;
318c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
328c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  // We must be able to access 3 extra bytes after the last written byte
338c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
348c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  // last byte of the quadruplet.
358c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  const int limit = (width - 1) & ~7;
368c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
378c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  for (j = 0; j < height; ++j) {
388c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    const __m128i* src = (const __m128i*)argb;
398c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    for (i = 0; i < limit; i += 8) {
408c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      // load 32 argb bytes
418c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const __m128i a0 = _mm_loadu_si128(src + 0);
428c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const __m128i a1 = _mm_loadu_si128(src + 1);
438c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const __m128i b0 = _mm_and_si128(a0, a_mask);
448c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const __m128i b1 = _mm_and_si128(a1, a_mask);
458c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const __m128i c0 = _mm_packs_epi32(b0, b1);
468c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const __m128i d0 = _mm_packus_epi16(c0, c0);
478c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      // store
488c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      _mm_storel_epi64((__m128i*)&alpha[i], d0);
498c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      // accumulate eight alpha 'and' in parallel
508c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      all_alphas = _mm_and_si128(all_alphas, d0);
518c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      src += 2;
528c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    }
538c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    for (; i < width; ++i) {
548c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      const uint32_t alpha_value = argb[4 * i];
558c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      alpha[i] = alpha_value;
568c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora      alpha_and &= alpha_value;
578c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    }
588c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    argb += argb_stride;
598c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora    alpha += alpha_stride;
608c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  }
618c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  // Combine the eight alpha 'and' into a 8-bit mask.
628c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
638c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  return (alpha_and == 0xff);
648c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora}
658c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
668c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#endif   // WEBP_USE_SSE2
678c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
688c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora//------------------------------------------------------------------------------
698c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora// Init function
708c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
718c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Aroraextern void WebPInitAlphaProcessingSSE2(void);
728c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora
738c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Aroravoid WebPInitAlphaProcessingSSE2(void) {
748c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#if defined(WEBP_USE_SSE2)
758c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora  WebPExtractAlpha = ExtractAlpha;
768c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora#endif
778c098653157979e397d3954fc2ea0ee43bae6ab2Vikas Arora}
78