1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Utilities for processing transparent channel.
11//
12// Author: Skal (pascal.massimino@gmail.com)
13
14#include "./dsp.h"
15
16#if defined(WEBP_USE_SSE2)
17#include <emmintrin.h>
18
19//------------------------------------------------------------------------------
20
21static int ExtractAlpha(const uint8_t* argb, int argb_stride,
22                        int width, int height,
23                        uint8_t* alpha, int alpha_stride) {
24  // alpha_and stores an 'and' operation of all the alpha[] values. The final
25  // value is not 0xff if any of the alpha[] is not equal to 0xff.
26  uint32_t alpha_and = 0xff;
27  int i, j;
28  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
29  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
30  __m128i all_alphas = all_0xff;
31
32  // We must be able to access 3 extra bytes after the last written byte
33  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
34  // last byte of the quadruplet.
35  const int limit = (width - 1) & ~7;
36
37  for (j = 0; j < height; ++j) {
38    const __m128i* src = (const __m128i*)argb;
39    for (i = 0; i < limit; i += 8) {
40      // load 32 argb bytes
41      const __m128i a0 = _mm_loadu_si128(src + 0);
42      const __m128i a1 = _mm_loadu_si128(src + 1);
43      const __m128i b0 = _mm_and_si128(a0, a_mask);
44      const __m128i b1 = _mm_and_si128(a1, a_mask);
45      const __m128i c0 = _mm_packs_epi32(b0, b1);
46      const __m128i d0 = _mm_packus_epi16(c0, c0);
47      // store
48      _mm_storel_epi64((__m128i*)&alpha[i], d0);
49      // accumulate eight alpha 'and' in parallel
50      all_alphas = _mm_and_si128(all_alphas, d0);
51      src += 2;
52    }
53    for (; i < width; ++i) {
54      const uint32_t alpha_value = argb[4 * i];
55      alpha[i] = alpha_value;
56      alpha_and &= alpha_value;
57    }
58    argb += argb_stride;
59    alpha += alpha_stride;
60  }
61  // Combine the eight alpha 'and' into a 8-bit mask.
62  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
63  return (alpha_and == 0xff);
64}
65
66#endif   // WEBP_USE_SSE2
67
68//------------------------------------------------------------------------------
69// Init function
70
71extern void WebPInitAlphaProcessingSSE2(void);
72
73void WebPInitAlphaProcessingSSE2(void) {
74#if defined(WEBP_USE_SSE2)
75  WebPExtractAlpha = ExtractAlpha;
76#endif
77}
78