1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2010 Google Inc. All Rights Reserved. 29aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// 30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// Use of this source code is governed by a BSD-style license 40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// that can be found in the COPYING file in the root of the source 50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// tree. An additional intellectual property rights grant can be found 60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// in the file PATENTS. All contributing project authors may 70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// be found in the AUTHORS file in the root of the source tree. 89aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// ----------------------------------------------------------------------------- 99aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// 1033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// YUV->RGB conversion functions 119aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// 129aea642eefa7a641ab8b89d953251939221d2719Eric Hassold// Author: Skal (pascal.massimino@gmail.com) 139aea642eefa7a641ab8b89d953251939221d2719Eric Hassold 14a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./yuv.h" 159aea642eefa7a641ab8b89d953251939221d2719Eric Hassold 16fa39824bb690c5806358871f46940d0450973d8aJames Zern#include <stdlib.h> 17fa39824bb690c5806358871f46940d0450973d8aJames Zern 188b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#if defined(WEBP_YUV_USE_TABLE) 199aea642eefa7a641ab8b89d953251939221d2719Eric Hassold 209aea642eefa7a641ab8b89d953251939221d2719Eric Hassoldstatic int done = 0; 219aea642eefa7a641ab8b89d953251939221d2719Eric Hassold 22a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE uint8_t clip(int v, int max_value) { 23466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora return v < 0 ? 0 : v > max_value ? max_value : v; 24466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora} 25466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora 268b720228d581a84fd173b6dcb2fa295b59db489aVikas Aroraint16_t VP8kVToR[256], VP8kUToB[256]; 278b720228d581a84fd173b6dcb2fa295b59db489aVikas Aroraint32_t VP8kVToG[256], VP8kUToG[256]; 288b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorauint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; 298b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorauint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN]; 308b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora 317c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) { 329aea642eefa7a641ab8b89d953251939221d2719Eric Hassold int i; 339aea642eefa7a641ab8b89d953251939221d2719Eric Hassold if (done) { 349aea642eefa7a641ab8b89d953251939221d2719Eric Hassold return; 359aea642eefa7a641ab8b89d953251939221d2719Eric Hassold } 361e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#ifndef USE_YUVj 379aea642eefa7a641ab8b89d953251939221d2719Eric Hassold for (i = 0; i < 256; ++i) { 389aea642eefa7a641ab8b89d953251939221d2719Eric Hassold VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX; 399aea642eefa7a641ab8b89d953251939221d2719Eric Hassold VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF; 409aea642eefa7a641ab8b89d953251939221d2719Eric Hassold VP8kVToG[i] = -45773 * (i - 128); 419aea642eefa7a641ab8b89d953251939221d2719Eric Hassold VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX; 429aea642eefa7a641ab8b89d953251939221d2719Eric Hassold } 439aea642eefa7a641ab8b89d953251939221d2719Eric Hassold for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { 449aea642eefa7a641ab8b89d953251939221d2719Eric Hassold const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX; 45466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); 46466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); 479aea642eefa7a641ab8b89d953251939221d2719Eric Hassold } 481e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#else 491e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora for (i = 0; i < 256; ++i) { 501e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX; 511e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF; 521e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora VP8kVToG[i] = -46802 * (i - 128); 531e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX; 541e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora } 551e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) { 561e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora const int k = i; 571e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); 581e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); 591e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora } 601e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#endif 611e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora 629aea642eefa7a641ab8b89d953251939221d2719Eric Hassold done = 1; 639aea642eefa7a641ab8b89d953251939221d2719Eric Hassold} 649aea642eefa7a641ab8b89d953251939221d2719Eric Hassold 650406ce1417f76f2034833414dcecc9f56253640cVikas Arora#else 660406ce1417f76f2034833414dcecc9f56253640cVikas Arora 677c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {} 680406ce1417f76f2034833414dcecc9f56253640cVikas Arora 690406ce1417f76f2034833414dcecc9f56253640cVikas Arora#endif // WEBP_YUV_USE_TABLE 700406ce1417f76f2034833414dcecc9f56253640cVikas Arora 718b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora//----------------------------------------------------------------------------- 7233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// Plain-C version 7333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 7433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \ 7533f74dabbc7920a65ed435d7417987589febdc16Vikas Arorastatic void FUNC_NAME(const uint8_t* y, \ 7633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora const uint8_t* u, const uint8_t* v, \ 7733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora uint8_t* dst, int len) { \ 7833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora const uint8_t* const end = dst + (len & ~1) * XSTEP; \ 7933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora while (dst != end) { \ 8033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora FUNC(y[0], u[0], v[0], dst); \ 8133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora FUNC(y[1], u[0], v[0], dst + XSTEP); \ 8233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora y += 2; \ 8333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora ++u; \ 8433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora ++v; \ 8533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora dst += 2 * XSTEP; \ 8633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora } \ 8733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora if (len & 1) { \ 8833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora FUNC(y[0], u[0], v[0], dst); \ 8933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora } \ 9033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora} \ 9133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 9233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// All variants implemented. 9333f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgbRow, VP8YuvToRgb, 3) 9433f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToBgrRow, VP8YuvToBgr, 3) 9533f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgbaRow, VP8YuvToRgba, 4) 9633f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToBgraRow, VP8YuvToBgra, 4) 9733f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToArgbRow, VP8YuvToArgb, 4) 9833f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2) 9933f74dabbc7920a65ed435d7417987589febdc16Vikas AroraROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2) 10033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 10133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#undef ROW_FUNC 10233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 10333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// Main call for processing a plane with a WebPSamplerRowFunc function: 10433f74dabbc7920a65ed435d7417987589febdc16Vikas Aroravoid WebPSamplerProcessPlane(const uint8_t* y, int y_stride, 10533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora const uint8_t* u, const uint8_t* v, int uv_stride, 10633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora uint8_t* dst, int dst_stride, 10733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora int width, int height, WebPSamplerRowFunc func) { 10833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora int j; 10933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora for (j = 0; j < height; ++j) { 11033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora func(y, u, v, dst, width); 11133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora y += y_stride; 11233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora if (j & 1) { 11333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora u += uv_stride; 11433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora v += uv_stride; 1158b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora } 11633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora dst += dst_stride; 1178b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora } 1188b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora} 1198b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora 12033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora//----------------------------------------------------------------------------- 12133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora// Main call 12233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 12333f74dabbc7920a65ed435d7417987589febdc16Vikas AroraWebPSamplerRowFunc WebPSamplers[MODE_LAST]; 12433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 12533f74dabbc7920a65ed435d7417987589febdc16Vikas Aroraextern void WebPInitSamplersSSE2(void); 12633f74dabbc7920a65ed435d7417987589febdc16Vikas Aroraextern void WebPInitSamplersMIPS32(void); 1277c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPInitSamplersMIPSdspR2(void); 12833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 1299e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zernstatic volatile VP8CPUInfo yuv_last_cpuinfo_used = 1309e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern (VP8CPUInfo)&yuv_last_cpuinfo_used; 1319e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern 1327c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) { 1339e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return; 1349e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern 13533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_RGB] = YuvToRgbRow; 13633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_RGBA] = YuvToRgbaRow; 13733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_BGR] = YuvToBgrRow; 13833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_BGRA] = YuvToBgraRow; 13933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_ARGB] = YuvToArgbRow; 14033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row; 14133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_RGB_565] = YuvToRgb565Row; 14233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_rgbA] = YuvToRgbaRow; 14333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_bgrA] = YuvToBgraRow; 14433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_Argb] = YuvToArgbRow; 14533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row; 14633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora 14733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora // If defined, use CPUInfo() to overwrite some pointers with faster versions. 14833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora if (VP8GetCPUInfo != NULL) { 14933f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#if defined(WEBP_USE_SSE2) 15033f74dabbc7920a65ed435d7417987589febdc16Vikas Arora if (VP8GetCPUInfo(kSSE2)) { 15133f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPInitSamplersSSE2(); 15233f74dabbc7920a65ed435d7417987589febdc16Vikas Arora } 15333f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#endif // WEBP_USE_SSE2 15433f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#if defined(WEBP_USE_MIPS32) 15533f74dabbc7920a65ed435d7417987589febdc16Vikas Arora if (VP8GetCPUInfo(kMIPS32)) { 15633f74dabbc7920a65ed435d7417987589febdc16Vikas Arora WebPInitSamplersMIPS32(); 15733f74dabbc7920a65ed435d7417987589febdc16Vikas Arora } 15833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora#endif // WEBP_USE_MIPS32 1597c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_MIPS_DSP_R2) 1607c8da7ce66017295a65ec028084b90800be377f8James Zern if (VP8GetCPUInfo(kMIPSdspR2)) { 1617c8da7ce66017295a65ec028084b90800be377f8James Zern WebPInitSamplersMIPSdspR2(); 1627c8da7ce66017295a65ec028084b90800be377f8James Zern } 1637c8da7ce66017295a65ec028084b90800be377f8James Zern#endif // WEBP_USE_MIPS_DSP_R2 1648b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora } 1659e80ee991168a0a6c2a906dd2c17c5e17df4566eJames Zern yuv_last_cpuinfo_used = VP8GetCPUInfo; 1668b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora} 1678b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora 16833f74dabbc7920a65ed435d7417987589febdc16Vikas Arora//----------------------------------------------------------------------------- 1697c8da7ce66017295a65ec028084b90800be377f8James Zern// ARGB -> YUV converters 1707c8da7ce66017295a65ec028084b90800be377f8James Zern 1717c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) { 1727c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 1737c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < width; ++i) { 1747c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t p = argb[i]; 1757c8da7ce66017295a65ec028084b90800be377f8James Zern y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff, 1767c8da7ce66017295a65ec028084b90800be377f8James Zern YUV_HALF); 1777c8da7ce66017295a65ec028084b90800be377f8James Zern } 1787c8da7ce66017295a65ec028084b90800be377f8James Zern} 1797c8da7ce66017295a65ec028084b90800be377f8James Zern 1807c8da7ce66017295a65ec028084b90800be377f8James Zernvoid WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, 1817c8da7ce66017295a65ec028084b90800be377f8James Zern int src_width, int do_store) { 1827c8da7ce66017295a65ec028084b90800be377f8James Zern // No rounding. Last pixel is dealt with separately. 1837c8da7ce66017295a65ec028084b90800be377f8James Zern const int uv_width = src_width >> 1; 1847c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 1857c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < uv_width; ++i) { 1867c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t v0 = argb[2 * i + 0]; 1877c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t v1 = argb[2 * i + 1]; 1887c8da7ce66017295a65ec028084b90800be377f8James Zern // VP8RGBToU/V expects four accumulated pixels. Hence we need to 1897c8da7ce66017295a65ec028084b90800be377f8James Zern // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less. 1907c8da7ce66017295a65ec028084b90800be377f8James Zern const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe); 1917c8da7ce66017295a65ec028084b90800be377f8James Zern const int g = ((v0 >> 7) & 0x1fe) + ((v1 >> 7) & 0x1fe); 1927c8da7ce66017295a65ec028084b90800be377f8James Zern const int b = ((v0 << 1) & 0x1fe) + ((v1 << 1) & 0x1fe); 1937c8da7ce66017295a65ec028084b90800be377f8James Zern const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); 1947c8da7ce66017295a65ec028084b90800be377f8James Zern const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); 1957c8da7ce66017295a65ec028084b90800be377f8James Zern if (do_store) { 1967c8da7ce66017295a65ec028084b90800be377f8James Zern u[i] = tmp_u; 1977c8da7ce66017295a65ec028084b90800be377f8James Zern v[i] = tmp_v; 1987c8da7ce66017295a65ec028084b90800be377f8James Zern } else { 1997c8da7ce66017295a65ec028084b90800be377f8James Zern // Approximated average-of-four. But it's an acceptable diff. 2007c8da7ce66017295a65ec028084b90800be377f8James Zern u[i] = (u[i] + tmp_u + 1) >> 1; 2017c8da7ce66017295a65ec028084b90800be377f8James Zern v[i] = (v[i] + tmp_v + 1) >> 1; 2027c8da7ce66017295a65ec028084b90800be377f8James Zern } 2037c8da7ce66017295a65ec028084b90800be377f8James Zern } 2047c8da7ce66017295a65ec028084b90800be377f8James Zern if (src_width & 1) { // last pixel 2057c8da7ce66017295a65ec028084b90800be377f8James Zern const uint32_t v0 = argb[2 * i + 0]; 2067c8da7ce66017295a65ec028084b90800be377f8James Zern const int r = (v0 >> 14) & 0x3fc; 2077c8da7ce66017295a65ec028084b90800be377f8James Zern const int g = (v0 >> 6) & 0x3fc; 2087c8da7ce66017295a65ec028084b90800be377f8James Zern const int b = (v0 << 2) & 0x3fc; 2097c8da7ce66017295a65ec028084b90800be377f8James Zern const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2); 2107c8da7ce66017295a65ec028084b90800be377f8James Zern const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2); 2117c8da7ce66017295a65ec028084b90800be377f8James Zern if (do_store) { 2127c8da7ce66017295a65ec028084b90800be377f8James Zern u[i] = tmp_u; 2137c8da7ce66017295a65ec028084b90800be377f8James Zern v[i] = tmp_v; 2147c8da7ce66017295a65ec028084b90800be377f8James Zern } else { 2157c8da7ce66017295a65ec028084b90800be377f8James Zern u[i] = (u[i] + tmp_u + 1) >> 1; 2167c8da7ce66017295a65ec028084b90800be377f8James Zern v[i] = (v[i] + tmp_v + 1) >> 1; 2177c8da7ce66017295a65ec028084b90800be377f8James Zern } 2187c8da7ce66017295a65ec028084b90800be377f8James Zern } 2197c8da7ce66017295a65ec028084b90800be377f8James Zern} 2207c8da7ce66017295a65ec028084b90800be377f8James Zern 2217c8da7ce66017295a65ec028084b90800be377f8James Zern//----------------------------------------------------------------------------- 2227c8da7ce66017295a65ec028084b90800be377f8James Zern 2237c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) { 2247c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 2257c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < width; ++i, rgb += 3) { 2267c8da7ce66017295a65ec028084b90800be377f8James Zern y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF); 2277c8da7ce66017295a65ec028084b90800be377f8James Zern } 2287c8da7ce66017295a65ec028084b90800be377f8James Zern} 2297c8da7ce66017295a65ec028084b90800be377f8James Zern 2307c8da7ce66017295a65ec028084b90800be377f8James Zernstatic void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) { 2317c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 2327c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < width; ++i, bgr += 3) { 2337c8da7ce66017295a65ec028084b90800be377f8James Zern y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF); 2347c8da7ce66017295a65ec028084b90800be377f8James Zern } 2357c8da7ce66017295a65ec028084b90800be377f8James Zern} 2367c8da7ce66017295a65ec028084b90800be377f8James Zern 2377c8da7ce66017295a65ec028084b90800be377f8James Zernvoid WebPConvertRGBA32ToUV_C(const uint16_t* rgb, 2387c8da7ce66017295a65ec028084b90800be377f8James Zern uint8_t* u, uint8_t* v, int width) { 2397c8da7ce66017295a65ec028084b90800be377f8James Zern int i; 2407c8da7ce66017295a65ec028084b90800be377f8James Zern for (i = 0; i < width; i += 1, rgb += 4) { 2417c8da7ce66017295a65ec028084b90800be377f8James Zern const int r = rgb[0], g = rgb[1], b = rgb[2]; 2427c8da7ce66017295a65ec028084b90800be377f8James Zern u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2); 2437c8da7ce66017295a65ec028084b90800be377f8James Zern v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2); 2447c8da7ce66017295a65ec028084b90800be377f8James Zern } 2457c8da7ce66017295a65ec028084b90800be377f8James Zern} 2467c8da7ce66017295a65ec028084b90800be377f8James Zern 2477c8da7ce66017295a65ec028084b90800be377f8James Zern//----------------------------------------------------------------------------- 2487c8da7ce66017295a65ec028084b90800be377f8James Zern 249fa39824bb690c5806358871f46940d0450973d8aJames Zern#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic 250fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic uint16_t clip_y(int v) { 251fa39824bb690c5806358871f46940d0450973d8aJames Zern return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; 252fa39824bb690c5806358871f46940d0450973d8aJames Zern} 253fa39824bb690c5806358871f46940d0450973d8aJames Zern 254fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src, 255fa39824bb690c5806358871f46940d0450973d8aJames Zern uint16_t* dst, int len) { 256fa39824bb690c5806358871f46940d0450973d8aJames Zern uint64_t diff = 0; 257fa39824bb690c5806358871f46940d0450973d8aJames Zern int i; 258fa39824bb690c5806358871f46940d0450973d8aJames Zern for (i = 0; i < len; ++i) { 259fa39824bb690c5806358871f46940d0450973d8aJames Zern const int diff_y = ref[i] - src[i]; 260fa39824bb690c5806358871f46940d0450973d8aJames Zern const int new_y = (int)dst[i] + diff_y; 261fa39824bb690c5806358871f46940d0450973d8aJames Zern dst[i] = clip_y(new_y); 262fa39824bb690c5806358871f46940d0450973d8aJames Zern diff += (uint64_t)abs(diff_y); 263fa39824bb690c5806358871f46940d0450973d8aJames Zern } 264fa39824bb690c5806358871f46940d0450973d8aJames Zern return diff; 265fa39824bb690c5806358871f46940d0450973d8aJames Zern} 266fa39824bb690c5806358871f46940d0450973d8aJames Zern 267fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src, 268fa39824bb690c5806358871f46940d0450973d8aJames Zern int16_t* dst, int len) { 269fa39824bb690c5806358871f46940d0450973d8aJames Zern int i; 270fa39824bb690c5806358871f46940d0450973d8aJames Zern for (i = 0; i < len; ++i) { 271fa39824bb690c5806358871f46940d0450973d8aJames Zern const int diff_uv = ref[i] - src[i]; 272fa39824bb690c5806358871f46940d0450973d8aJames Zern dst[i] += diff_uv; 273fa39824bb690c5806358871f46940d0450973d8aJames Zern } 274fa39824bb690c5806358871f46940d0450973d8aJames Zern} 275fa39824bb690c5806358871f46940d0450973d8aJames Zern 276fa39824bb690c5806358871f46940d0450973d8aJames Zernstatic void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, 277fa39824bb690c5806358871f46940d0450973d8aJames Zern const uint16_t* best_y, uint16_t* out) { 278fa39824bb690c5806358871f46940d0450973d8aJames Zern int i; 279fa39824bb690c5806358871f46940d0450973d8aJames Zern for (i = 0; i < len; ++i, ++A, ++B) { 280fa39824bb690c5806358871f46940d0450973d8aJames Zern const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; 281fa39824bb690c5806358871f46940d0450973d8aJames Zern const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; 282fa39824bb690c5806358871f46940d0450973d8aJames Zern out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); 283fa39824bb690c5806358871f46940d0450973d8aJames Zern out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); 284fa39824bb690c5806358871f46940d0450973d8aJames Zern } 285fa39824bb690c5806358871f46940d0450973d8aJames Zern} 286fa39824bb690c5806358871f46940d0450973d8aJames Zern 287fa39824bb690c5806358871f46940d0450973d8aJames Zern#undef MAX_Y 288fa39824bb690c5806358871f46940d0450973d8aJames Zern 289fa39824bb690c5806358871f46940d0450973d8aJames Zern//----------------------------------------------------------------------------- 290fa39824bb690c5806358871f46940d0450973d8aJames Zern 2917c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); 2927c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); 2937c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, 2947c8da7ce66017295a65ec028084b90800be377f8James Zern uint8_t* u, uint8_t* v, int width); 2957c8da7ce66017295a65ec028084b90800be377f8James Zern 2967c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); 2977c8da7ce66017295a65ec028084b90800be377f8James Zernvoid (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, 2987c8da7ce66017295a65ec028084b90800be377f8James Zern int src_width, int do_store); 2997c8da7ce66017295a65ec028084b90800be377f8James Zern 300fa39824bb690c5806358871f46940d0450973d8aJames Zernuint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src, 301fa39824bb690c5806358871f46940d0450973d8aJames Zern uint16_t* dst, int len); 302fa39824bb690c5806358871f46940d0450973d8aJames Zernvoid (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src, 303fa39824bb690c5806358871f46940d0450973d8aJames Zern int16_t* dst, int len); 304fa39824bb690c5806358871f46940d0450973d8aJames Zernvoid (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len, 305fa39824bb690c5806358871f46940d0450973d8aJames Zern const uint16_t* best_y, uint16_t* out); 306fa39824bb690c5806358871f46940d0450973d8aJames Zern 3077c8da7ce66017295a65ec028084b90800be377f8James Zernstatic volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used = 3087c8da7ce66017295a65ec028084b90800be377f8James Zern (VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used; 3097c8da7ce66017295a65ec028084b90800be377f8James Zern 3107c8da7ce66017295a65ec028084b90800be377f8James Zernextern void WebPInitConvertARGBToYUVSSE2(void); 311fa39824bb690c5806358871f46940d0450973d8aJames Zernextern void WebPInitSharpYUVSSE2(void); 3127c8da7ce66017295a65ec028084b90800be377f8James Zern 3137c8da7ce66017295a65ec028084b90800be377f8James ZernWEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) { 3147c8da7ce66017295a65ec028084b90800be377f8James Zern if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return; 3157c8da7ce66017295a65ec028084b90800be377f8James Zern 3167c8da7ce66017295a65ec028084b90800be377f8James Zern WebPConvertARGBToY = ConvertARGBToY; 3177c8da7ce66017295a65ec028084b90800be377f8James Zern WebPConvertARGBToUV = WebPConvertARGBToUV_C; 3187c8da7ce66017295a65ec028084b90800be377f8James Zern 3197c8da7ce66017295a65ec028084b90800be377f8James Zern WebPConvertRGB24ToY = ConvertRGB24ToY; 3207c8da7ce66017295a65ec028084b90800be377f8James Zern WebPConvertBGR24ToY = ConvertBGR24ToY; 3217c8da7ce66017295a65ec028084b90800be377f8James Zern 3227c8da7ce66017295a65ec028084b90800be377f8James Zern WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; 3237c8da7ce66017295a65ec028084b90800be377f8James Zern 324fa39824bb690c5806358871f46940d0450973d8aJames Zern WebPSharpYUVUpdateY = SharpYUVUpdateY_C; 325fa39824bb690c5806358871f46940d0450973d8aJames Zern WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; 326fa39824bb690c5806358871f46940d0450973d8aJames Zern WebPSharpYUVFilterRow = SharpYUVFilterRow_C; 327fa39824bb690c5806358871f46940d0450973d8aJames Zern 3287c8da7ce66017295a65ec028084b90800be377f8James Zern if (VP8GetCPUInfo != NULL) { 3297c8da7ce66017295a65ec028084b90800be377f8James Zern#if defined(WEBP_USE_SSE2) 3307c8da7ce66017295a65ec028084b90800be377f8James Zern if (VP8GetCPUInfo(kSSE2)) { 3317c8da7ce66017295a65ec028084b90800be377f8James Zern WebPInitConvertARGBToYUVSSE2(); 332fa39824bb690c5806358871f46940d0450973d8aJames Zern WebPInitSharpYUVSSE2(); 3337c8da7ce66017295a65ec028084b90800be377f8James Zern } 3347c8da7ce66017295a65ec028084b90800be377f8James Zern#endif // WEBP_USE_SSE2 3357c8da7ce66017295a65ec028084b90800be377f8James Zern } 3367c8da7ce66017295a65ec028084b90800be377f8James Zern rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo; 3377c8da7ce66017295a65ec028084b90800be377f8James Zern} 338