1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2010 Google Inc. All Rights Reserved.
29aea642eefa7a641ab8b89d953251939221d2719Eric Hassold//
30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// Use of this source code is governed by a BSD-style license
40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// that can be found in the COPYING file in the root of the source
50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// tree. An additional intellectual property rights grant can be found
60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// in the file PATENTS. All contributing project authors may
70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// be found in the AUTHORS file in the root of the source tree.
89aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// -----------------------------------------------------------------------------
99aea642eefa7a641ab8b89d953251939221d2719Eric Hassold//
1033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// YUV->RGB conversion functions
119aea642eefa7a641ab8b89d953251939221d2719Eric Hassold//
129aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// Author: Skal (pascal.massimino@gmail.com)
139aea642eefa7a641ab8b89d953251939221d2719Eric Hassold
14a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./yuv.h"
159aea642eefa7a641ab8b89d953251939221d2719Eric Hassold
16fa39824bb690c5806358871f46940d0450973d8aJames Zern#include <stdlib.h>
17fa39824bb690c5806358871f46940d0450973d8aJames Zern
188b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#if defined(WEBP_YUV_USE_TABLE)
199aea642eefa7a641ab8b89d953251939221d2719Eric Hassold
209aea642eefa7a641ab8b89d953251939221d2719Eric Hassoldstatic int done = 0;
219aea642eefa7a641ab8b89d953251939221d2719Eric Hassold
22a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE uint8_t clip(int v, int max_value) {
23466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  return v < 0 ? 0 : v > max_value ? max_value : v;
24466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora}
25466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora
268b720228d581a84fd173b6dcb2fa295b59db489aVikas Aroraint16_t VP8kVToR[256], VP8kUToB[256];
278b720228d581a84fd173b6dcb2fa295b59db489aVikas Aroraint32_t VP8kVToG[256], VP8kUToG[256];
288b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorauint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
298b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorauint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
308b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
317c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
329aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  int i;
339aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  if (done) {
349aea642eefa7a641ab8b89d953251939221d2719Eric Hassold    return;
359aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  }
361e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#ifndef USE_YUVj
379aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  for (i = 0; i < 256; ++i) {
389aea642eefa7a641ab8b89d953251939221d2719Eric Hassold    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
399aea642eefa7a641ab8b89d953251939221d2719Eric Hassold    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
409aea642eefa7a641ab8b89d953251939221d2719Eric Hassold    VP8kVToG[i] = -45773 * (i - 128);
419aea642eefa7a641ab8b89d953251939221d2719Eric Hassold    VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
429aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  }
439aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
449aea642eefa7a641ab8b89d953251939221d2719Eric Hassold    const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
45466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
46466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
479aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  }
481e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#else
491e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  for (i = 0; i < 256; ++i) {
501e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
511e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
521e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8kVToG[i] = -46802 * (i - 128);
531e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
541e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
551e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
561e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    const int k = i;
571e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
581e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
591e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
601e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#endif
611e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
629aea642eefa7a641ab8b89d953251939221d2719Eric Hassold  done = 1;
639aea642eefa7a641ab8b89d953251939221d2719Eric Hassold}
649aea642eefa7a641ab8b89d953251939221d2719Eric Hassold
650406ce1417f76f2034833414dcecc9f56253640cVikas Arora#else
660406ce1417f76f2034833414dcecc9f56253640cVikas Arora
677c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
680406ce1417f76f2034833414dcecc9f56253640cVikas Arora
690406ce1417f76f2034833414dcecc9f56253640cVikas Arora#endif  // WEBP_YUV_USE_TABLE
700406ce1417f76f2034833414dcecc9f56253640cVikas Arora
718b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora//-----------------------------------------------------------------------------
7233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// Plain-C version
7333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
7433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP)                                       \
7533f74dabbc7920a65ed435d7417987589febdc16Vikas Arorastatic void FUNC_NAME(const uint8_t* y,                                        \
7633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora                      const uint8_t* u, const uint8_t* v,                      \
7733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora                      uint8_t* dst, int len) {                                 \
7833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  const uint8_t* const end = dst + (len & ~1) * XSTEP;                         \
7933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  while (dst != end) {                                                         \
8033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    FUNC(y[0], u[0], v[0], dst);                                               \
8133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    FUNC(y[1], u[0], v[0], dst + XSTEP);                                       \
8233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    y += 2;                                                                    \
8333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    ++u;                                                                       \
8433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    ++v;                                                                       \
8533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    dst += 2 * XSTEP;                                                          \
8633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  }                                                                            \
8733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  if (len & 1) {                                                               \
8833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    FUNC(y[0], u[0], v[0], dst);                                               \
8933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  }                                                                            \
9033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora}                                                                              \
9133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
9233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// All variants implemented.
9333f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgbRow,      VP8YuvToRgb,  3)
9433f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToBgrRow,      VP8YuvToBgr,  3)
9533f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgbaRow,     VP8YuvToRgba, 4)
9633f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToBgraRow,     VP8YuvToBgra, 4)
9733f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToArgbRow,     VP8YuvToArgb, 4)
9833f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2)
9933f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgb565Row,   VP8YuvToRgb565, 2)
10033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
10133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#undef ROW_FUNC
10233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
10333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// Main call for processing a plane with a WebPSamplerRowFunc function:
10433f74dabbc7920a65ed435d7417987589febdc16Vikas Aroravoid WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
10533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora                             const uint8_t* u, const uint8_t* v, int uv_stride,
10633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora                             uint8_t* dst, int dst_stride,
10733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora                             int width, int height, WebPSamplerRowFunc func) {
10833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  int j;
10933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  for (j = 0; j < height; ++j) {
11033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    func(y, u, v, dst, width);
11133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    y += y_stride;
11233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    if (j & 1) {
11333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora      u += uv_stride;
11433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora      v += uv_stride;
1158b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
11633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    dst += dst_stride;
1178b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
1188b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora}
1198b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
12033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora//-----------------------------------------------------------------------------
12133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// Main call
12233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
12333f74dabbc7920a65ed435d7417987589febdc16Vikas AroraWebPSamplerRowFunc WebPSamplers[MODE_LAST];
12433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
12533f74dabbc7920a65ed435d7417987589febdc16Vikas Aroraextern void WebPInitSamplersSSE2(void);
12633f74dabbc7920a65ed435d7417987589febdc16Vikas Aroraextern void WebPInitSamplersMIPS32(void);
1277c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPInitSamplersMIPSdspR2(void);
12833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
1299e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zernstatic volatile VP8CPUInfo yuv_last_cpuinfo_used =
1309e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern    (VP8CPUInfo)&yuv_last_cpuinfo_used;
1319e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern
1327c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
1339e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern  if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
1349e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern
13533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_RGB]       = YuvToRgbRow;
13633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_RGBA]      = YuvToRgbaRow;
13733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_BGR]       = YuvToBgrRow;
13833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_BGRA]      = YuvToBgraRow;
13933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_ARGB]      = YuvToArgbRow;
14033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row;
14133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_RGB_565]   = YuvToRgb565Row;
14233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_rgbA]      = YuvToRgbaRow;
14333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_bgrA]      = YuvToBgraRow;
14433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_Argb]      = YuvToArgbRow;
14533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row;
14633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora
14733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
14833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora  if (VP8GetCPUInfo != NULL) {
14933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#if defined(WEBP_USE_SSE2)
15033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    if (VP8GetCPUInfo(kSSE2)) {
15133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora      WebPInitSamplersSSE2();
15233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    }
15333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#endif  // WEBP_USE_SSE2
15433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#if defined(WEBP_USE_MIPS32)
15533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    if (VP8GetCPUInfo(kMIPS32)) {
15633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora      WebPInitSamplersMIPS32();
15733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora    }
15833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#endif  // WEBP_USE_MIPS32
1597c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_MIPS_DSP_R2)
1607c8da7ce66017295a65ec028084b90800be377f8James Zern    if (VP8GetCPUInfo(kMIPSdspR2)) {
1617c8da7ce66017295a65ec028084b90800be377f8James Zern      WebPInitSamplersMIPSdspR2();
1627c8da7ce66017295a65ec028084b90800be377f8James Zern    }
1637c8da7ce66017295a65ec028084b90800be377f8James Zern#endif  // WEBP_USE_MIPS_DSP_R2
1648b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
1659e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern  yuv_last_cpuinfo_used = VP8GetCPUInfo;
1668b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora}
1678b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
16833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora//-----------------------------------------------------------------------------
1697c8da7ce66017295a65ec028084b90800be377f8James Zern// ARGB -> YUV converters
1707c8da7ce66017295a65ec028084b90800be377f8James Zern
1717c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
1727c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
1737c8da7ce66017295a65ec028084b90800be377f8James Zern  for (i = 0; i < width; ++i) {
1747c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t p = argb[i];
1757c8da7ce66017295a65ec028084b90800be377f8James Zern    y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >>  0) & 0xff,
1767c8da7ce66017295a65ec028084b90800be377f8James Zern                     YUV_HALF);
1777c8da7ce66017295a65ec028084b90800be377f8James Zern  }
1787c8da7ce66017295a65ec028084b90800be377f8James Zern}
1797c8da7ce66017295a65ec028084b90800be377f8James Zern
1807c8da7ce66017295a65ec028084b90800be377f8James Zernvoid WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
1817c8da7ce66017295a65ec028084b90800be377f8James Zern                           int src_width, int do_store) {
1827c8da7ce66017295a65ec028084b90800be377f8James Zern  // No rounding. Last pixel is dealt with separately.
1837c8da7ce66017295a65ec028084b90800be377f8James Zern  const int uv_width = src_width >> 1;
1847c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
1857c8da7ce66017295a65ec028084b90800be377f8James Zern  for (i = 0; i < uv_width; ++i) {
1867c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t v0 = argb[2 * i + 0];
1877c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t v1 = argb[2 * i + 1];
1887c8da7ce66017295a65ec028084b90800be377f8James Zern    // VP8RGBToU/V expects four accumulated pixels. Hence we need to
1897c8da7ce66017295a65ec028084b90800be377f8James Zern    // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
1907c8da7ce66017295a65ec028084b90800be377f8James Zern    const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
1917c8da7ce66017295a65ec028084b90800be377f8James Zern    const int g = ((v0 >>  7) & 0x1fe) + ((v1 >>  7) & 0x1fe);
1927c8da7ce66017295a65ec028084b90800be377f8James Zern    const int b = ((v0 <<  1) & 0x1fe) + ((v1 <<  1) & 0x1fe);
1937c8da7ce66017295a65ec028084b90800be377f8James Zern    const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
1947c8da7ce66017295a65ec028084b90800be377f8James Zern    const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
1957c8da7ce66017295a65ec028084b90800be377f8James Zern    if (do_store) {
1967c8da7ce66017295a65ec028084b90800be377f8James Zern      u[i] = tmp_u;
1977c8da7ce66017295a65ec028084b90800be377f8James Zern      v[i] = tmp_v;
1987c8da7ce66017295a65ec028084b90800be377f8James Zern    } else {
1997c8da7ce66017295a65ec028084b90800be377f8James Zern      // Approximated average-of-four. But it's an acceptable diff.
2007c8da7ce66017295a65ec028084b90800be377f8James Zern      u[i] = (u[i] + tmp_u + 1) >> 1;
2017c8da7ce66017295a65ec028084b90800be377f8James Zern      v[i] = (v[i] + tmp_v + 1) >> 1;
2027c8da7ce66017295a65ec028084b90800be377f8James Zern    }
2037c8da7ce66017295a65ec028084b90800be377f8James Zern  }
2047c8da7ce66017295a65ec028084b90800be377f8James Zern  if (src_width & 1) {       // last pixel
2057c8da7ce66017295a65ec028084b90800be377f8James Zern    const uint32_t v0 = argb[2 * i + 0];
2067c8da7ce66017295a65ec028084b90800be377f8James Zern    const int r = (v0 >> 14) & 0x3fc;
2077c8da7ce66017295a65ec028084b90800be377f8James Zern    const int g = (v0 >>  6) & 0x3fc;
2087c8da7ce66017295a65ec028084b90800be377f8James Zern    const int b = (v0 <<  2) & 0x3fc;
2097c8da7ce66017295a65ec028084b90800be377f8James Zern    const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
2107c8da7ce66017295a65ec028084b90800be377f8James Zern    const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
2117c8da7ce66017295a65ec028084b90800be377f8James Zern    if (do_store) {
2127c8da7ce66017295a65ec028084b90800be377f8James Zern      u[i] = tmp_u;
2137c8da7ce66017295a65ec028084b90800be377f8James Zern      v[i] = tmp_v;
2147c8da7ce66017295a65ec028084b90800be377f8James Zern    } else {
2157c8da7ce66017295a65ec028084b90800be377f8James Zern      u[i] = (u[i] + tmp_u + 1) >> 1;
2167c8da7ce66017295a65ec028084b90800be377f8James Zern      v[i] = (v[i] + tmp_v + 1) >> 1;
2177c8da7ce66017295a65ec028084b90800be377f8James Zern    }
2187c8da7ce66017295a65ec028084b90800be377f8James Zern  }
2197c8da7ce66017295a65ec028084b90800be377f8James Zern}
2207c8da7ce66017295a65ec028084b90800be377f8James Zern
2217c8da7ce66017295a65ec028084b90800be377f8James Zern//-----------------------------------------------------------------------------
2227c8da7ce66017295a65ec028084b90800be377f8James Zern
2237c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
2247c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
2257c8da7ce66017295a65ec028084b90800be377f8James Zern  for (i = 0; i < width; ++i, rgb += 3) {
2267c8da7ce66017295a65ec028084b90800be377f8James Zern    y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
2277c8da7ce66017295a65ec028084b90800be377f8James Zern  }
2287c8da7ce66017295a65ec028084b90800be377f8James Zern}
2297c8da7ce66017295a65ec028084b90800be377f8James Zern
2307c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
2317c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
2327c8da7ce66017295a65ec028084b90800be377f8James Zern  for (i = 0; i < width; ++i, bgr += 3) {
2337c8da7ce66017295a65ec028084b90800be377f8James Zern    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
2347c8da7ce66017295a65ec028084b90800be377f8James Zern  }
2357c8da7ce66017295a65ec028084b90800be377f8James Zern}
2367c8da7ce66017295a65ec028084b90800be377f8James Zern
2377c8da7ce66017295a65ec028084b90800be377f8James Zernvoid WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
2387c8da7ce66017295a65ec028084b90800be377f8James Zern                             uint8_t* u, uint8_t* v, int width) {
2397c8da7ce66017295a65ec028084b90800be377f8James Zern  int i;
2407c8da7ce66017295a65ec028084b90800be377f8James Zern  for (i = 0; i < width; i += 1, rgb += 4) {
2417c8da7ce66017295a65ec028084b90800be377f8James Zern    const int r = rgb[0], g = rgb[1], b = rgb[2];
2427c8da7ce66017295a65ec028084b90800be377f8James Zern    u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
2437c8da7ce66017295a65ec028084b90800be377f8James Zern    v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
2447c8da7ce66017295a65ec028084b90800be377f8James Zern  }
2457c8da7ce66017295a65ec028084b90800be377f8James Zern}
2467c8da7ce66017295a65ec028084b90800be377f8James Zern
2477c8da7ce66017295a65ec028084b90800be377f8James Zern//-----------------------------------------------------------------------------
2487c8da7ce66017295a65ec028084b90800be377f8James Zern
249fa39824bb690c5806358871f46940d0450973d8aJames Zern#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
250fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic uint16_t clip_y(int v) {
251fa39824bb690c5806358871f46940d0450973d8aJames Zern  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
252fa39824bb690c5806358871f46940d0450973d8aJames Zern}
253fa39824bb690c5806358871f46940d0450973d8aJames Zern
254fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
255fa39824bb690c5806358871f46940d0450973d8aJames Zern                                  uint16_t* dst, int len) {
256fa39824bb690c5806358871f46940d0450973d8aJames Zern  uint64_t diff = 0;
257fa39824bb690c5806358871f46940d0450973d8aJames Zern  int i;
258fa39824bb690c5806358871f46940d0450973d8aJames Zern  for (i = 0; i < len; ++i) {
259fa39824bb690c5806358871f46940d0450973d8aJames Zern    const int diff_y = ref[i] - src[i];
260fa39824bb690c5806358871f46940d0450973d8aJames Zern    const int new_y = (int)dst[i] + diff_y;
261fa39824bb690c5806358871f46940d0450973d8aJames Zern    dst[i] = clip_y(new_y);
262fa39824bb690c5806358871f46940d0450973d8aJames Zern    diff += (uint64_t)abs(diff_y);
263fa39824bb690c5806358871f46940d0450973d8aJames Zern  }
264fa39824bb690c5806358871f46940d0450973d8aJames Zern  return diff;
265fa39824bb690c5806358871f46940d0450973d8aJames Zern}
266fa39824bb690c5806358871f46940d0450973d8aJames Zern
267fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
268fa39824bb690c5806358871f46940d0450973d8aJames Zern                                int16_t* dst, int len) {
269fa39824bb690c5806358871f46940d0450973d8aJames Zern  int i;
270fa39824bb690c5806358871f46940d0450973d8aJames Zern  for (i = 0; i < len; ++i) {
271fa39824bb690c5806358871f46940d0450973d8aJames Zern    const int diff_uv = ref[i] - src[i];
272fa39824bb690c5806358871f46940d0450973d8aJames Zern    dst[i] += diff_uv;
273fa39824bb690c5806358871f46940d0450973d8aJames Zern  }
274fa39824bb690c5806358871f46940d0450973d8aJames Zern}
275fa39824bb690c5806358871f46940d0450973d8aJames Zern
276fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
277fa39824bb690c5806358871f46940d0450973d8aJames Zern                                const uint16_t* best_y, uint16_t* out) {
278fa39824bb690c5806358871f46940d0450973d8aJames Zern  int i;
279fa39824bb690c5806358871f46940d0450973d8aJames Zern  for (i = 0; i < len; ++i, ++A, ++B) {
280fa39824bb690c5806358871f46940d0450973d8aJames Zern    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
281fa39824bb690c5806358871f46940d0450973d8aJames Zern    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
282fa39824bb690c5806358871f46940d0450973d8aJames Zern    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
283fa39824bb690c5806358871f46940d0450973d8aJames Zern    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
284fa39824bb690c5806358871f46940d0450973d8aJames Zern  }
285fa39824bb690c5806358871f46940d0450973d8aJames Zern}
286fa39824bb690c5806358871f46940d0450973d8aJames Zern
287fa39824bb690c5806358871f46940d0450973d8aJames Zern#undef MAX_Y
288fa39824bb690c5806358871f46940d0450973d8aJames Zern
289fa39824bb690c5806358871f46940d0450973d8aJames Zern//-----------------------------------------------------------------------------
290fa39824bb690c5806358871f46940d0450973d8aJames Zern
2917c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
2927c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
2937c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
2947c8da7ce66017295a65ec028084b90800be377f8James Zern                              uint8_t* u, uint8_t* v, int width);
2957c8da7ce66017295a65ec028084b90800be377f8James Zern
2967c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
2977c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
2987c8da7ce66017295a65ec028084b90800be377f8James Zern                            int src_width, int do_store);
2997c8da7ce66017295a65ec028084b90800be377f8James Zern
300fa39824bb690c5806358871f46940d0450973d8aJames Zernuint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
301fa39824bb690c5806358871f46940d0450973d8aJames Zern                                uint16_t* dst, int len);
302fa39824bb690c5806358871f46940d0450973d8aJames Zernvoid (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
303fa39824bb690c5806358871f46940d0450973d8aJames Zern                              int16_t* dst, int len);
304fa39824bb690c5806358871f46940d0450973d8aJames Zernvoid (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
305fa39824bb690c5806358871f46940d0450973d8aJames Zern                              const uint16_t* best_y, uint16_t* out);
306fa39824bb690c5806358871f46940d0450973d8aJames Zern
3077c8da7ce66017295a65ec028084b90800be377f8James Zernstatic volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used =
3087c8da7ce66017295a65ec028084b90800be377f8James Zern    (VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used;
3097c8da7ce66017295a65ec028084b90800be377f8James Zern
3107c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPInitConvertARGBToYUVSSE2(void);
311fa39824bb690c5806358871f46940d0450973d8aJames Zernextern void WebPInitSharpYUVSSE2(void);
3127c8da7ce66017295a65ec028084b90800be377f8James Zern
3137c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
3147c8da7ce66017295a65ec028084b90800be377f8James Zern  if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
3157c8da7ce66017295a65ec028084b90800be377f8James Zern
3167c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPConvertARGBToY = ConvertARGBToY;
3177c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPConvertARGBToUV = WebPConvertARGBToUV_C;
3187c8da7ce66017295a65ec028084b90800be377f8James Zern
3197c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPConvertRGB24ToY = ConvertRGB24ToY;
3207c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPConvertBGR24ToY = ConvertBGR24ToY;
3217c8da7ce66017295a65ec028084b90800be377f8James Zern
3227c8da7ce66017295a65ec028084b90800be377f8James Zern  WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
3237c8da7ce66017295a65ec028084b90800be377f8James Zern
324fa39824bb690c5806358871f46940d0450973d8aJames Zern  WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
325fa39824bb690c5806358871f46940d0450973d8aJames Zern  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
326fa39824bb690c5806358871f46940d0450973d8aJames Zern  WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
327fa39824bb690c5806358871f46940d0450973d8aJames Zern
3287c8da7ce66017295a65ec028084b90800be377f8James Zern  if (VP8GetCPUInfo != NULL) {
3297c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_SSE2)
3307c8da7ce66017295a65ec028084b90800be377f8James Zern    if (VP8GetCPUInfo(kSSE2)) {
3317c8da7ce66017295a65ec028084b90800be377f8James Zern      WebPInitConvertARGBToYUVSSE2();
332fa39824bb690c5806358871f46940d0450973d8aJames Zern      WebPInitSharpYUVSSE2();
3337c8da7ce66017295a65ec028084b90800be377f8James Zern    }
3347c8da7ce66017295a65ec028084b90800be377f8James Zern#endif  // WEBP_USE_SSE2
3357c8da7ce66017295a65ec028084b90800be377f8James Zern  }
3367c8da7ce66017295a65ec028084b90800be377f8James Zern  rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
3377c8da7ce66017295a65ec028084b90800be377f8James Zern}
338