1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Utilities for processing transparent channel.
11//
12// Author: Skal (pascal.massimino@gmail.com)
13
14#include <assert.h>
15#include "./dsp.h"
16
17// Tables can be faster on some platform but incur some extra binary size (~2k).
18// #define USE_TABLES_FOR_ALPHA_MULT
19
20// -----------------------------------------------------------------------------
21
22#define MFIX 24    // 24bit fixed-point arithmetic
23#define HALF ((1u << MFIX) >> 1)
24#define KINV_255 ((1u << MFIX) / 255u)
25
26static uint32_t Mult(uint8_t x, uint32_t mult) {
27  const uint32_t v = (x * mult + HALF) >> MFIX;
28  assert(v <= 255);  // <- 24bit precision is enough to ensure that.
29  return v;
30}
31
32#ifdef USE_TABLES_FOR_ALPHA_MULT
33
34static const uint32_t kMultTables[2][256] = {
35  {    // (255u << MFIX) / alpha
36    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
37    0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
38    0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
39    0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
40    0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
41    0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
42    0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
43    0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
44    0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
45    0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
46    0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
47    0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
48    0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
49    0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
50    0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
51    0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
52    0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
53    0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
54    0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
55    0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
56    0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
57    0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
58    0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
59    0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
60    0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
61    0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
62    0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
63    0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
64    0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
65    0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
66    0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
67    0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
68    0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
69    0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
70    0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
71    0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
72    0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
73    0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
74    0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
75    0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
76    0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
77    0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
78    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
79  {   // alpha * KINV_255
80    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
81    0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
82    0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
83    0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
84    0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
85    0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
86    0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
87    0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
88    0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
89    0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
90    0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
91    0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
92    0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
93    0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
94    0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
95    0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
96    0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
97    0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
98    0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
99    0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
100    0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
101    0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
102    0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
103    0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
104    0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
105    0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
106    0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
107    0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
108    0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
109    0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
110    0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
111    0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
112    0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
113    0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
114    0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
115    0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
116    0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
117    0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
118    0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
119    0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
120    0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
121    0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
122    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
123};
124
125static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
126  return kMultTables[!inverse][a];
127}
128
129#else
130
131static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
132  return inverse ? (255u << MFIX) / a : a * KINV_255;
133}
134
135#endif    // USE_TABLES_FOR_ALPHA_MULT
136
137void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
138  int x;
139  for (x = 0; x < width; ++x) {
140    const uint32_t argb = ptr[x];
141    if (argb < 0xff000000u) {      // alpha < 255
142      if (argb <= 0x00ffffffu) {   // alpha == 0
143        ptr[x] = 0;
144      } else {
145        const uint32_t alpha = (argb >> 24) & 0xff;
146        const uint32_t scale = GetScale(alpha, inverse);
147        uint32_t out = argb & 0xff000000u;
148        out |= Mult(argb >>  0, scale) <<  0;
149        out |= Mult(argb >>  8, scale) <<  8;
150        out |= Mult(argb >> 16, scale) << 16;
151        ptr[x] = out;
152      }
153    }
154  }
155}
156
157void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
158                  int width, int inverse) {
159  int x;
160  for (x = 0; x < width; ++x) {
161    const uint32_t a = alpha[x];
162    if (a != 255) {
163      if (a == 0) {
164        ptr[x] = 0;
165      } else {
166        const uint32_t scale = GetScale(a, inverse);
167        ptr[x] = Mult(ptr[x], scale);
168      }
169    }
170  }
171}
172
173#undef KINV_255
174#undef HALF
175#undef MFIX
176
177void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
178void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
179                    int width, int inverse);
180
181//------------------------------------------------------------------------------
182// Generic per-plane calls
183
184void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
185                      int inverse) {
186  int n;
187  for (n = 0; n < num_rows; ++n) {
188    WebPMultARGBRow((uint32_t*)ptr, width, inverse);
189    ptr += stride;
190  }
191}
192
193void WebPMultRows(uint8_t* ptr, int stride,
194                  const uint8_t* alpha, int alpha_stride,
195                  int width, int num_rows, int inverse) {
196  int n;
197  for (n = 0; n < num_rows; ++n) {
198    WebPMultRow(ptr, alpha, width, inverse);
199    ptr += stride;
200    alpha += alpha_stride;
201  }
202}
203
204//------------------------------------------------------------------------------
205// Premultiplied modes
206
207// non dithered-modes
208
209// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
210// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
211// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
212#if 1     // (int)(x * a / 255.)
213#define MULTIPLIER(a)   ((a) * 32897U)
214#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
215#else     // (int)(x * a / 255. + .5)
216#define MULTIPLIER(a) ((a) * 65793U)
217#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
218#endif
219
220static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
221                               int w, int h, int stride) {
222  while (h-- > 0) {
223    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
224    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
225    int i;
226    for (i = 0; i < w; ++i) {
227      const uint32_t a = alpha[4 * i];
228      if (a != 0xff) {
229        const uint32_t mult = MULTIPLIER(a);
230        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
231        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
232        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
233      }
234    }
235    rgba += stride;
236  }
237}
238#undef MULTIPLIER
239#undef PREMULTIPLY
240
241// rgbA4444
242
243#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
244
245static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
246  return (x & 0xf0) | (x >> 4);
247}
248
249static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
250  return (x & 0x0f) | (x << 4);
251}
252
253static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
254  return (x * m) >> 16;
255}
256
257static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
258                                               int w, int h, int stride,
259                                               int rg_byte_pos /* 0 or 1 */) {
260  while (h-- > 0) {
261    int i;
262    for (i = 0; i < w; ++i) {
263      const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
264      const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
265      const uint8_t a = ba & 0x0f;
266      const uint32_t mult = MULTIPLIER(a);
267      const uint8_t r = multiply(dither_hi(rg), mult);
268      const uint8_t g = multiply(dither_lo(rg), mult);
269      const uint8_t b = multiply(dither_hi(ba), mult);
270      rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
271      rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
272    }
273    rgba4444 += stride;
274  }
275}
276#undef MULTIPLIER
277
278static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
279                                   int w, int h, int stride) {
280#ifdef WEBP_SWAP_16BIT_CSP
281  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
282#else
283  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
284#endif
285}
286
287static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
288                         int width, int height,
289                         uint8_t* dst, int dst_stride) {
290  uint32_t alpha_mask = 0xff;
291  int i, j;
292
293  for (j = 0; j < height; ++j) {
294    for (i = 0; i < width; ++i) {
295      const uint32_t alpha_value = alpha[i];
296      dst[4 * i] = alpha_value;
297      alpha_mask &= alpha_value;
298    }
299    alpha += alpha_stride;
300    dst += dst_stride;
301  }
302
303  return (alpha_mask != 0xff);
304}
305
306static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
307                                 int width, int height,
308                                 uint32_t* dst, int dst_stride) {
309  int i, j;
310  for (j = 0; j < height; ++j) {
311    for (i = 0; i < width; ++i) {
312      dst[i] = alpha[i] << 8;  // leave A/R/B channels zero'd.
313    }
314    alpha += alpha_stride;
315    dst += dst_stride;
316  }
317}
318
319static int ExtractAlpha(const uint8_t* argb, int argb_stride,
320                        int width, int height,
321                        uint8_t* alpha, int alpha_stride) {
322  uint8_t alpha_mask = 0xff;
323  int i, j;
324
325  for (j = 0; j < height; ++j) {
326    for (i = 0; i < width; ++i) {
327      const uint8_t alpha_value = argb[4 * i];
328      alpha[i] = alpha_value;
329      alpha_mask &= alpha_value;
330    }
331    argb += argb_stride;
332    alpha += alpha_stride;
333  }
334  return (alpha_mask == 0xff);
335}
336
337void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
338void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
339int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
340void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
341int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
342
343//------------------------------------------------------------------------------
344// Init function
345
346extern void WebPInitAlphaProcessingMIPSdspR2(void);
347extern void WebPInitAlphaProcessingSSE2(void);
348extern void WebPInitAlphaProcessingSSE41(void);
349
350static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
351    (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
352
353WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
354  if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
355
356  WebPMultARGBRow = WebPMultARGBRowC;
357  WebPMultRow = WebPMultRowC;
358  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
359  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
360  WebPDispatchAlpha = DispatchAlpha;
361  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
362  WebPExtractAlpha = ExtractAlpha;
363
364  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
365  if (VP8GetCPUInfo != NULL) {
366#if defined(WEBP_USE_SSE2)
367    if (VP8GetCPUInfo(kSSE2)) {
368      WebPInitAlphaProcessingSSE2();
369#if defined(WEBP_USE_SSE41)
370      if (VP8GetCPUInfo(kSSE4_1)) {
371        WebPInitAlphaProcessingSSE41();
372      }
373#endif
374    }
375#endif
376#if defined(WEBP_USE_MIPS_DSP_R2)
377    if (VP8GetCPUInfo(kMIPSdspR2)) {
378      WebPInitAlphaProcessingMIPSdspR2();
379    }
380#endif
381  }
382  alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
383}
384