row_common.cc revision 7ce0a1d1337c01056ba24006efab21f00e179e04
1/* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "libyuv/row.h" 12 13#include <string.h> // For memcpy and memset. 14 15#include "libyuv/basic_types.h" 16 17#ifdef __cplusplus 18namespace libyuv { 19extern "C" { 20#endif 21 22// llvm x86 is poor at ternary operator, so use branchless min/max. 23 24#define USE_BRANCHLESS 1 25#if USE_BRANCHLESS 26static __inline int32 clamp0(int32 v) { 27 return ((-(v) >> 31) & (v)); 28} 29 30static __inline int32 clamp255(int32 v) { 31 return (((255 - (v)) >> 31) | (v)) & 255; 32} 33 34static __inline uint32 Clamp(int32 val) { 35 int v = clamp0(val); 36 return (uint32)(clamp255(v)); 37} 38 39static __inline uint32 Abs(int32 v) { 40 int m = v >> 31; 41 return (v + m) ^ m; 42} 43#else // USE_BRANCHLESS 44static __inline int32 clamp0(int32 v) { 45 return (v < 0) ? 0 : v; 46} 47 48static __inline int32 clamp255(int32 v) { 49 return (v > 255) ? 255 : v; 50} 51 52static __inline uint32 Clamp(int32 val) { 53 int v = clamp0(val); 54 return (uint32)(clamp255(v)); 55} 56 57static __inline uint32 Abs(int32 v) { 58 return (v < 0) ? -v : v; 59} 60#endif // USE_BRANCHLESS 61 62#ifdef LIBYUV_LITTLE_ENDIAN 63#define WRITEWORD(p, v) *(uint32*)(p) = v 64#else 65static inline void WRITEWORD(uint8* p, uint32 v) { 66 p[0] = (uint8)(v & 255); 67 p[1] = (uint8)((v >> 8) & 255); 68 p[2] = (uint8)((v >> 16) & 255); 69 p[3] = (uint8)((v >> 24) & 255); 70} 71#endif 72 73void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { 74 int x; 75 for (x = 0; x < width; ++x) { 76 uint8 b = src_rgb24[0]; 77 uint8 g = src_rgb24[1]; 78 uint8 r = src_rgb24[2]; 79 dst_argb[0] = b; 80 dst_argb[1] = g; 81 dst_argb[2] = r; 82 dst_argb[3] = 255u; 83 dst_argb += 4; 84 src_rgb24 += 3; 85 } 86} 87 88void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { 89 int x; 90 for (x = 0; x < width; ++x) { 91 uint8 r = src_raw[0]; 92 uint8 g = src_raw[1]; 93 uint8 b = src_raw[2]; 94 dst_argb[0] = b; 95 dst_argb[1] = g; 96 dst_argb[2] = r; 97 dst_argb[3] = 255u; 98 dst_argb += 4; 99 src_raw += 3; 100 } 101} 102 103void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { 104 int x; 105 for (x = 0; x < width; ++x) { 106 uint8 b = src_rgb565[0] & 0x1f; 107 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 108 uint8 r = src_rgb565[1] >> 3; 109 dst_argb[0] = (b << 3) | (b >> 2); 110 dst_argb[1] = (g << 2) | (g >> 4); 111 dst_argb[2] = (r << 3) | (r >> 2); 112 dst_argb[3] = 255u; 113 dst_argb += 4; 114 src_rgb565 += 2; 115 } 116} 117 118void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, 119 int width) { 120 int x; 121 for (x = 0; x < width; ++x) { 122 uint8 b = src_argb1555[0] & 0x1f; 123 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 124 uint8 r = (src_argb1555[1] & 0x7c) >> 2; 125 uint8 a = src_argb1555[1] >> 7; 126 dst_argb[0] = (b << 3) | (b >> 2); 127 dst_argb[1] = (g << 3) | (g >> 2); 128 dst_argb[2] = (r << 3) | (r >> 2); 129 dst_argb[3] = -a; 130 dst_argb += 4; 131 src_argb1555 += 2; 132 } 133} 134 135void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, 136 int width) { 137 int x; 138 for (x = 0; x < width; ++x) { 139 uint8 b = src_argb4444[0] & 0x0f; 140 uint8 g = src_argb4444[0] >> 4; 141 uint8 r = src_argb4444[1] & 0x0f; 142 uint8 a = src_argb4444[1] >> 4; 143 dst_argb[0] = (b << 4) | b; 144 dst_argb[1] = (g << 4) | g; 145 dst_argb[2] = (r << 4) | r; 146 dst_argb[3] = (a << 4) | a; 147 dst_argb += 4; 148 src_argb4444 += 2; 149 } 150} 151 152void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 153 int x; 154 for (x = 0; x < width; ++x) { 155 uint8 b = src_argb[0]; 156 uint8 g = src_argb[1]; 157 uint8 r = src_argb[2]; 158 dst_rgb[0] = b; 159 dst_rgb[1] = g; 160 dst_rgb[2] = r; 161 dst_rgb += 3; 162 src_argb += 4; 163 } 164} 165 166void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { 167 int x; 168 for (x = 0; x < width; ++x) { 169 uint8 b = src_argb[0]; 170 uint8 g = src_argb[1]; 171 uint8 r = src_argb[2]; 172 dst_rgb[0] = r; 173 dst_rgb[1] = g; 174 dst_rgb[2] = b; 175 dst_rgb += 3; 176 src_argb += 4; 177 } 178} 179 180void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 181 int x; 182 for (x = 0; x < width - 1; x += 2) { 183 uint8 b0 = src_argb[0] >> 3; 184 uint8 g0 = src_argb[1] >> 2; 185 uint8 r0 = src_argb[2] >> 3; 186 uint8 b1 = src_argb[4] >> 3; 187 uint8 g1 = src_argb[5] >> 2; 188 uint8 r1 = src_argb[6] >> 3; 189 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | 190 (b1 << 16) | (g1 << 21) | (r1 << 27)); 191 dst_rgb += 4; 192 src_argb += 8; 193 } 194 if (width & 1) { 195 uint8 b0 = src_argb[0] >> 3; 196 uint8 g0 = src_argb[1] >> 2; 197 uint8 r0 = src_argb[2] >> 3; 198 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 199 } 200} 201 202// dither4 is a row of 4 values from 4x4 dither matrix. 203// The 4x4 matrix contains values to increase RGB. When converting to 204// fewer bits (565) this provides an ordered dither. 205// The order in the 4x4 matrix in first byte is upper left. 206// The 4 values are passed as an int, then referenced as an array, so 207// endian will not affect order of the original matrix. But the dither4 208// will containing the first pixel in the lower byte for little endian 209// or the upper byte for big endian. 210void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, 211 const uint32 dither4, int width) { 212 int x; 213 for (x = 0; x < width - 1; x += 2) { 214 int dither0 = ((const unsigned char*)(&dither4))[x & 3]; 215 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; 216 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; 217 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; 218 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; 219 uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; 220 uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; 221 uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; 222 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | 223 (b1 << 16) | (g1 << 21) | (r1 << 27)); 224 dst_rgb += 4; 225 src_argb += 8; 226 } 227 if (width & 1) { 228 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; 229 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; 230 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; 231 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; 232 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 233 } 234} 235 236void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 237 int x; 238 for (x = 0; x < width - 1; x += 2) { 239 uint8 b0 = src_argb[0] >> 3; 240 uint8 g0 = src_argb[1] >> 3; 241 uint8 r0 = src_argb[2] >> 3; 242 uint8 a0 = src_argb[3] >> 7; 243 uint8 b1 = src_argb[4] >> 3; 244 uint8 g1 = src_argb[5] >> 3; 245 uint8 r1 = src_argb[6] >> 3; 246 uint8 a1 = src_argb[7] >> 7; 247 *(uint32*)(dst_rgb) = 248 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | 249 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); 250 dst_rgb += 4; 251 src_argb += 8; 252 } 253 if (width & 1) { 254 uint8 b0 = src_argb[0] >> 3; 255 uint8 g0 = src_argb[1] >> 3; 256 uint8 r0 = src_argb[2] >> 3; 257 uint8 a0 = src_argb[3] >> 7; 258 *(uint16*)(dst_rgb) = 259 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); 260 } 261} 262 263void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 264 int x; 265 for (x = 0; x < width - 1; x += 2) { 266 uint8 b0 = src_argb[0] >> 4; 267 uint8 g0 = src_argb[1] >> 4; 268 uint8 r0 = src_argb[2] >> 4; 269 uint8 a0 = src_argb[3] >> 4; 270 uint8 b1 = src_argb[4] >> 4; 271 uint8 g1 = src_argb[5] >> 4; 272 uint8 r1 = src_argb[6] >> 4; 273 uint8 a1 = src_argb[7] >> 4; 274 *(uint32*)(dst_rgb) = 275 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | 276 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); 277 dst_rgb += 4; 278 src_argb += 8; 279 } 280 if (width & 1) { 281 uint8 b0 = src_argb[0] >> 4; 282 uint8 g0 = src_argb[1] >> 4; 283 uint8 r0 = src_argb[2] >> 4; 284 uint8 a0 = src_argb[3] >> 4; 285 *(uint16*)(dst_rgb) = 286 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); 287 } 288} 289 290static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { 291 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; 292} 293 294static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { 295 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; 296} 297static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { 298 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; 299} 300 301#define MAKEROWY(NAME, R, G, B, BPP) \ 302void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 303 int x; \ 304 for (x = 0; x < width; ++x) { \ 305 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ 306 src_argb0 += BPP; \ 307 dst_y += 1; \ 308 } \ 309} \ 310void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 311 uint8* dst_u, uint8* dst_v, int width) { \ 312 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 313 int x; \ 314 for (x = 0; x < width - 1; x += 2) { \ 315 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \ 316 src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \ 317 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \ 318 src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \ 319 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \ 320 src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \ 321 dst_u[0] = RGBToU(ar, ag, ab); \ 322 dst_v[0] = RGBToV(ar, ag, ab); \ 323 src_rgb0 += BPP * 2; \ 324 src_rgb1 += BPP * 2; \ 325 dst_u += 1; \ 326 dst_v += 1; \ 327 } \ 328 if (width & 1) { \ 329 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ 330 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ 331 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ 332 dst_u[0] = RGBToU(ar, ag, ab); \ 333 dst_v[0] = RGBToV(ar, ag, ab); \ 334 } \ 335} 336 337MAKEROWY(ARGB, 2, 1, 0, 4) 338MAKEROWY(BGRA, 1, 2, 3, 4) 339MAKEROWY(ABGR, 0, 1, 2, 4) 340MAKEROWY(RGBA, 3, 2, 1, 4) 341MAKEROWY(RGB24, 2, 1, 0, 3) 342MAKEROWY(RAW, 0, 1, 2, 3) 343#undef MAKEROWY 344 345// JPeg uses a variation on BT.601-1 full range 346// y = 0.29900 * r + 0.58700 * g + 0.11400 * b 347// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center 348// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center 349// BT.601 Mpeg range uses: 350// b 0.1016 * 255 = 25.908 = 25 351// g 0.5078 * 255 = 129.489 = 129 352// r 0.2578 * 255 = 65.739 = 66 353// JPeg 8 bit Y (not used): 354// b 0.11400 * 256 = 29.184 = 29 355// g 0.58700 * 256 = 150.272 = 150 356// r 0.29900 * 256 = 76.544 = 77 357// JPeg 7 bit Y: 358// b 0.11400 * 128 = 14.592 = 15 359// g 0.58700 * 128 = 75.136 = 75 360// r 0.29900 * 128 = 38.272 = 38 361// JPeg 8 bit U: 362// b 0.50000 * 255 = 127.5 = 127 363// g -0.33126 * 255 = -84.4713 = -84 364// r -0.16874 * 255 = -43.0287 = -43 365// JPeg 8 bit V: 366// b -0.08131 * 255 = -20.73405 = -20 367// g -0.41869 * 255 = -106.76595 = -107 368// r 0.50000 * 255 = 127.5 = 127 369 370static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { 371 return (38 * r + 75 * g + 15 * b + 64) >> 7; 372} 373 374static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { 375 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; 376} 377static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { 378 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; 379} 380 381#define AVGB(a, b) (((a) + (b) + 1) >> 1) 382 383#define MAKEROWYJ(NAME, R, G, B, BPP) \ 384void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 385 int x; \ 386 for (x = 0; x < width; ++x) { \ 387 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ 388 src_argb0 += BPP; \ 389 dst_y += 1; \ 390 } \ 391} \ 392void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 393 uint8* dst_u, uint8* dst_v, int width) { \ 394 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 395 int x; \ 396 for (x = 0; x < width - 1; x += 2) { \ 397 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ 398 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ 399 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ 400 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ 401 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ 402 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ 403 dst_u[0] = RGBToUJ(ar, ag, ab); \ 404 dst_v[0] = RGBToVJ(ar, ag, ab); \ 405 src_rgb0 += BPP * 2; \ 406 src_rgb1 += BPP * 2; \ 407 dst_u += 1; \ 408 dst_v += 1; \ 409 } \ 410 if (width & 1) { \ 411 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ 412 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ 413 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ 414 dst_u[0] = RGBToUJ(ar, ag, ab); \ 415 dst_v[0] = RGBToVJ(ar, ag, ab); \ 416 } \ 417} 418 419MAKEROWYJ(ARGB, 2, 1, 0, 4) 420#undef MAKEROWYJ 421 422void ARGBToUVJ422Row_C(const uint8* src_argb, 423 uint8* dst_u, uint8* dst_v, int width) { 424 int x; 425 for (x = 0; x < width - 1; x += 2) { 426 uint8 ab = (src_argb[0] + src_argb[4]) >> 1; 427 uint8 ag = (src_argb[1] + src_argb[5]) >> 1; 428 uint8 ar = (src_argb[2] + src_argb[6]) >> 1; 429 dst_u[0] = RGBToUJ(ar, ag, ab); 430 dst_v[0] = RGBToVJ(ar, ag, ab); 431 src_argb += 8; 432 dst_u += 1; 433 dst_v += 1; 434 } 435 if (width & 1) { 436 uint8 ab = src_argb[0]; 437 uint8 ag = src_argb[1]; 438 uint8 ar = src_argb[2]; 439 dst_u[0] = RGBToUJ(ar, ag, ab); 440 dst_v[0] = RGBToVJ(ar, ag, ab); 441 } 442} 443 444void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { 445 int x; 446 for (x = 0; x < width; ++x) { 447 uint8 b = src_rgb565[0] & 0x1f; 448 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 449 uint8 r = src_rgb565[1] >> 3; 450 b = (b << 3) | (b >> 2); 451 g = (g << 2) | (g >> 4); 452 r = (r << 3) | (r >> 2); 453 dst_y[0] = RGBToY(r, g, b); 454 src_rgb565 += 2; 455 dst_y += 1; 456 } 457} 458 459void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { 460 int x; 461 for (x = 0; x < width; ++x) { 462 uint8 b = src_argb1555[0] & 0x1f; 463 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 464 uint8 r = (src_argb1555[1] & 0x7c) >> 2; 465 b = (b << 3) | (b >> 2); 466 g = (g << 3) | (g >> 2); 467 r = (r << 3) | (r >> 2); 468 dst_y[0] = RGBToY(r, g, b); 469 src_argb1555 += 2; 470 dst_y += 1; 471 } 472} 473 474void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { 475 int x; 476 for (x = 0; x < width; ++x) { 477 uint8 b = src_argb4444[0] & 0x0f; 478 uint8 g = src_argb4444[0] >> 4; 479 uint8 r = src_argb4444[1] & 0x0f; 480 b = (b << 4) | b; 481 g = (g << 4) | g; 482 r = (r << 4) | r; 483 dst_y[0] = RGBToY(r, g, b); 484 src_argb4444 += 2; 485 dst_y += 1; 486 } 487} 488 489void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, 490 uint8* dst_u, uint8* dst_v, int width) { 491 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; 492 int x; 493 for (x = 0; x < width - 1; x += 2) { 494 uint8 b0 = src_rgb565[0] & 0x1f; 495 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 496 uint8 r0 = src_rgb565[1] >> 3; 497 uint8 b1 = src_rgb565[2] & 0x1f; 498 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); 499 uint8 r1 = src_rgb565[3] >> 3; 500 uint8 b2 = next_rgb565[0] & 0x1f; 501 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 502 uint8 r2 = next_rgb565[1] >> 3; 503 uint8 b3 = next_rgb565[2] & 0x1f; 504 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); 505 uint8 r3 = next_rgb565[3] >> 3; 506 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. 507 uint8 g = (g0 + g1 + g2 + g3); 508 uint8 r = (r0 + r1 + r2 + r3); 509 b = (b << 1) | (b >> 6); // 787 -> 888. 510 r = (r << 1) | (r >> 6); 511 dst_u[0] = RGBToU(r, g, b); 512 dst_v[0] = RGBToV(r, g, b); 513 src_rgb565 += 4; 514 next_rgb565 += 4; 515 dst_u += 1; 516 dst_v += 1; 517 } 518 if (width & 1) { 519 uint8 b0 = src_rgb565[0] & 0x1f; 520 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 521 uint8 r0 = src_rgb565[1] >> 3; 522 uint8 b2 = next_rgb565[0] & 0x1f; 523 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 524 uint8 r2 = next_rgb565[1] >> 3; 525 uint8 b = (b0 + b2); // 565 * 2 = 676. 526 uint8 g = (g0 + g2); 527 uint8 r = (r0 + r2); 528 b = (b << 2) | (b >> 4); // 676 -> 888 529 g = (g << 1) | (g >> 6); 530 r = (r << 2) | (r >> 4); 531 dst_u[0] = RGBToU(r, g, b); 532 dst_v[0] = RGBToV(r, g, b); 533 } 534} 535 536void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, 537 uint8* dst_u, uint8* dst_v, int width) { 538 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; 539 int x; 540 for (x = 0; x < width - 1; x += 2) { 541 uint8 b0 = src_argb1555[0] & 0x1f; 542 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 543 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 544 uint8 b1 = src_argb1555[2] & 0x1f; 545 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); 546 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; 547 uint8 b2 = next_argb1555[0] & 0x1f; 548 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 549 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; 550 uint8 b3 = next_argb1555[2] & 0x1f; 551 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); 552 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; 553 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. 554 uint8 g = (g0 + g1 + g2 + g3); 555 uint8 r = (r0 + r1 + r2 + r3); 556 b = (b << 1) | (b >> 6); // 777 -> 888. 557 g = (g << 1) | (g >> 6); 558 r = (r << 1) | (r >> 6); 559 dst_u[0] = RGBToU(r, g, b); 560 dst_v[0] = RGBToV(r, g, b); 561 src_argb1555 += 4; 562 next_argb1555 += 4; 563 dst_u += 1; 564 dst_v += 1; 565 } 566 if (width & 1) { 567 uint8 b0 = src_argb1555[0] & 0x1f; 568 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 569 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 570 uint8 b2 = next_argb1555[0] & 0x1f; 571 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 572 uint8 r2 = next_argb1555[1] >> 3; 573 uint8 b = (b0 + b2); // 555 * 2 = 666. 574 uint8 g = (g0 + g2); 575 uint8 r = (r0 + r2); 576 b = (b << 2) | (b >> 4); // 666 -> 888. 577 g = (g << 2) | (g >> 4); 578 r = (r << 2) | (r >> 4); 579 dst_u[0] = RGBToU(r, g, b); 580 dst_v[0] = RGBToV(r, g, b); 581 } 582} 583 584void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, 585 uint8* dst_u, uint8* dst_v, int width) { 586 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; 587 int x; 588 for (x = 0; x < width - 1; x += 2) { 589 uint8 b0 = src_argb4444[0] & 0x0f; 590 uint8 g0 = src_argb4444[0] >> 4; 591 uint8 r0 = src_argb4444[1] & 0x0f; 592 uint8 b1 = src_argb4444[2] & 0x0f; 593 uint8 g1 = src_argb4444[2] >> 4; 594 uint8 r1 = src_argb4444[3] & 0x0f; 595 uint8 b2 = next_argb4444[0] & 0x0f; 596 uint8 g2 = next_argb4444[0] >> 4; 597 uint8 r2 = next_argb4444[1] & 0x0f; 598 uint8 b3 = next_argb4444[2] & 0x0f; 599 uint8 g3 = next_argb4444[2] >> 4; 600 uint8 r3 = next_argb4444[3] & 0x0f; 601 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. 602 uint8 g = (g0 + g1 + g2 + g3); 603 uint8 r = (r0 + r1 + r2 + r3); 604 b = (b << 2) | (b >> 4); // 666 -> 888. 605 g = (g << 2) | (g >> 4); 606 r = (r << 2) | (r >> 4); 607 dst_u[0] = RGBToU(r, g, b); 608 dst_v[0] = RGBToV(r, g, b); 609 src_argb4444 += 4; 610 next_argb4444 += 4; 611 dst_u += 1; 612 dst_v += 1; 613 } 614 if (width & 1) { 615 uint8 b0 = src_argb4444[0] & 0x0f; 616 uint8 g0 = src_argb4444[0] >> 4; 617 uint8 r0 = src_argb4444[1] & 0x0f; 618 uint8 b2 = next_argb4444[0] & 0x0f; 619 uint8 g2 = next_argb4444[0] >> 4; 620 uint8 r2 = next_argb4444[1] & 0x0f; 621 uint8 b = (b0 + b2); // 444 * 2 = 555. 622 uint8 g = (g0 + g2); 623 uint8 r = (r0 + r2); 624 b = (b << 3) | (b >> 2); // 555 -> 888. 625 g = (g << 3) | (g >> 2); 626 r = (r << 3) | (r >> 2); 627 dst_u[0] = RGBToU(r, g, b); 628 dst_v[0] = RGBToV(r, g, b); 629 } 630} 631 632void ARGBToUV444Row_C(const uint8* src_argb, 633 uint8* dst_u, uint8* dst_v, int width) { 634 int x; 635 for (x = 0; x < width; ++x) { 636 uint8 ab = src_argb[0]; 637 uint8 ag = src_argb[1]; 638 uint8 ar = src_argb[2]; 639 dst_u[0] = RGBToU(ar, ag, ab); 640 dst_v[0] = RGBToV(ar, ag, ab); 641 src_argb += 4; 642 dst_u += 1; 643 dst_v += 1; 644 } 645} 646 647void ARGBToUV422Row_C(const uint8* src_argb, 648 uint8* dst_u, uint8* dst_v, int width) { 649 int x; 650 for (x = 0; x < width - 1; x += 2) { 651 uint8 ab = (src_argb[0] + src_argb[4]) >> 1; 652 uint8 ag = (src_argb[1] + src_argb[5]) >> 1; 653 uint8 ar = (src_argb[2] + src_argb[6]) >> 1; 654 dst_u[0] = RGBToU(ar, ag, ab); 655 dst_v[0] = RGBToV(ar, ag, ab); 656 src_argb += 8; 657 dst_u += 1; 658 dst_v += 1; 659 } 660 if (width & 1) { 661 uint8 ab = src_argb[0]; 662 uint8 ag = src_argb[1]; 663 uint8 ar = src_argb[2]; 664 dst_u[0] = RGBToU(ar, ag, ab); 665 dst_v[0] = RGBToV(ar, ag, ab); 666 } 667} 668 669void ARGBToUV411Row_C(const uint8* src_argb, 670 uint8* dst_u, uint8* dst_v, int width) { 671 int x; 672 for (x = 0; x < width - 3; x += 4) { 673 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; 674 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; 675 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; 676 dst_u[0] = RGBToU(ar, ag, ab); 677 dst_v[0] = RGBToV(ar, ag, ab); 678 src_argb += 16; 679 dst_u += 1; 680 dst_v += 1; 681 } 682 if ((width & 3) == 3) { 683 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3; 684 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3; 685 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3; 686 dst_u[0] = RGBToU(ar, ag, ab); 687 dst_v[0] = RGBToV(ar, ag, ab); 688 } else if ((width & 3) == 2) { 689 uint8 ab = (src_argb[0] + src_argb[4]) >> 1; 690 uint8 ag = (src_argb[1] + src_argb[5]) >> 1; 691 uint8 ar = (src_argb[2] + src_argb[6]) >> 1; 692 dst_u[0] = RGBToU(ar, ag, ab); 693 dst_v[0] = RGBToV(ar, ag, ab); 694 } else if ((width & 3) == 1) { 695 uint8 ab = src_argb[0]; 696 uint8 ag = src_argb[1]; 697 uint8 ar = src_argb[2]; 698 dst_u[0] = RGBToU(ar, ag, ab); 699 dst_v[0] = RGBToV(ar, ag, ab); 700 } 701} 702 703void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 704 int x; 705 for (x = 0; x < width; ++x) { 706 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); 707 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 708 dst_argb[3] = src_argb[3]; 709 dst_argb += 4; 710 src_argb += 4; 711 } 712} 713 714// Convert a row of image to Sepia tone. 715void ARGBSepiaRow_C(uint8* dst_argb, int width) { 716 int x; 717 for (x = 0; x < width; ++x) { 718 int b = dst_argb[0]; 719 int g = dst_argb[1]; 720 int r = dst_argb[2]; 721 int sb = (b * 17 + g * 68 + r * 35) >> 7; 722 int sg = (b * 22 + g * 88 + r * 45) >> 7; 723 int sr = (b * 24 + g * 98 + r * 50) >> 7; 724 // b does not over flow. a is preserved from original. 725 dst_argb[0] = sb; 726 dst_argb[1] = clamp255(sg); 727 dst_argb[2] = clamp255(sr); 728 dst_argb += 4; 729 } 730} 731 732// Apply color matrix to a row of image. Matrix is signed. 733// TODO(fbarchard): Consider adding rounding (+32). 734void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, 735 const int8* matrix_argb, int width) { 736 int x; 737 for (x = 0; x < width; ++x) { 738 int b = src_argb[0]; 739 int g = src_argb[1]; 740 int r = src_argb[2]; 741 int a = src_argb[3]; 742 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + 743 r * matrix_argb[2] + a * matrix_argb[3]) >> 6; 744 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + 745 r * matrix_argb[6] + a * matrix_argb[7]) >> 6; 746 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + 747 r * matrix_argb[10] + a * matrix_argb[11]) >> 6; 748 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + 749 r * matrix_argb[14] + a * matrix_argb[15]) >> 6; 750 dst_argb[0] = Clamp(sb); 751 dst_argb[1] = Clamp(sg); 752 dst_argb[2] = Clamp(sr); 753 dst_argb[3] = Clamp(sa); 754 src_argb += 4; 755 dst_argb += 4; 756 } 757} 758 759// Apply color table to a row of image. 760void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 761 int x; 762 for (x = 0; x < width; ++x) { 763 int b = dst_argb[0]; 764 int g = dst_argb[1]; 765 int r = dst_argb[2]; 766 int a = dst_argb[3]; 767 dst_argb[0] = table_argb[b * 4 + 0]; 768 dst_argb[1] = table_argb[g * 4 + 1]; 769 dst_argb[2] = table_argb[r * 4 + 2]; 770 dst_argb[3] = table_argb[a * 4 + 3]; 771 dst_argb += 4; 772 } 773} 774 775// Apply color table to a row of image. 776void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 777 int x; 778 for (x = 0; x < width; ++x) { 779 int b = dst_argb[0]; 780 int g = dst_argb[1]; 781 int r = dst_argb[2]; 782 dst_argb[0] = table_argb[b * 4 + 0]; 783 dst_argb[1] = table_argb[g * 4 + 1]; 784 dst_argb[2] = table_argb[r * 4 + 2]; 785 dst_argb += 4; 786 } 787} 788 789void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, 790 int interval_offset, int width) { 791 int x; 792 for (x = 0; x < width; ++x) { 793 int b = dst_argb[0]; 794 int g = dst_argb[1]; 795 int r = dst_argb[2]; 796 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 797 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; 798 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; 799 dst_argb += 4; 800 } 801} 802 803#define REPEAT8(v) (v) | ((v) << 8) 804#define SHADE(f, v) v * f >> 24 805 806void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, 807 uint32 value) { 808 const uint32 b_scale = REPEAT8(value & 0xff); 809 const uint32 g_scale = REPEAT8((value >> 8) & 0xff); 810 const uint32 r_scale = REPEAT8((value >> 16) & 0xff); 811 const uint32 a_scale = REPEAT8(value >> 24); 812 813 int i; 814 for (i = 0; i < width; ++i) { 815 const uint32 b = REPEAT8(src_argb[0]); 816 const uint32 g = REPEAT8(src_argb[1]); 817 const uint32 r = REPEAT8(src_argb[2]); 818 const uint32 a = REPEAT8(src_argb[3]); 819 dst_argb[0] = SHADE(b, b_scale); 820 dst_argb[1] = SHADE(g, g_scale); 821 dst_argb[2] = SHADE(r, r_scale); 822 dst_argb[3] = SHADE(a, a_scale); 823 src_argb += 4; 824 dst_argb += 4; 825 } 826} 827#undef REPEAT8 828#undef SHADE 829 830#define REPEAT8(v) (v) | ((v) << 8) 831#define SHADE(f, v) v * f >> 16 832 833void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1, 834 uint8* dst_argb, int width) { 835 int i; 836 for (i = 0; i < width; ++i) { 837 const uint32 b = REPEAT8(src_argb0[0]); 838 const uint32 g = REPEAT8(src_argb0[1]); 839 const uint32 r = REPEAT8(src_argb0[2]); 840 const uint32 a = REPEAT8(src_argb0[3]); 841 const uint32 b_scale = src_argb1[0]; 842 const uint32 g_scale = src_argb1[1]; 843 const uint32 r_scale = src_argb1[2]; 844 const uint32 a_scale = src_argb1[3]; 845 dst_argb[0] = SHADE(b, b_scale); 846 dst_argb[1] = SHADE(g, g_scale); 847 dst_argb[2] = SHADE(r, r_scale); 848 dst_argb[3] = SHADE(a, a_scale); 849 src_argb0 += 4; 850 src_argb1 += 4; 851 dst_argb += 4; 852 } 853} 854#undef REPEAT8 855#undef SHADE 856 857#define SHADE(f, v) clamp255(v + f) 858 859void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1, 860 uint8* dst_argb, int width) { 861 int i; 862 for (i = 0; i < width; ++i) { 863 const int b = src_argb0[0]; 864 const int g = src_argb0[1]; 865 const int r = src_argb0[2]; 866 const int a = src_argb0[3]; 867 const int b_add = src_argb1[0]; 868 const int g_add = src_argb1[1]; 869 const int r_add = src_argb1[2]; 870 const int a_add = src_argb1[3]; 871 dst_argb[0] = SHADE(b, b_add); 872 dst_argb[1] = SHADE(g, g_add); 873 dst_argb[2] = SHADE(r, r_add); 874 dst_argb[3] = SHADE(a, a_add); 875 src_argb0 += 4; 876 src_argb1 += 4; 877 dst_argb += 4; 878 } 879} 880#undef SHADE 881 882#define SHADE(f, v) clamp0(f - v) 883 884void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1, 885 uint8* dst_argb, int width) { 886 int i; 887 for (i = 0; i < width; ++i) { 888 const int b = src_argb0[0]; 889 const int g = src_argb0[1]; 890 const int r = src_argb0[2]; 891 const int a = src_argb0[3]; 892 const int b_sub = src_argb1[0]; 893 const int g_sub = src_argb1[1]; 894 const int r_sub = src_argb1[2]; 895 const int a_sub = src_argb1[3]; 896 dst_argb[0] = SHADE(b, b_sub); 897 dst_argb[1] = SHADE(g, g_sub); 898 dst_argb[2] = SHADE(r, r_sub); 899 dst_argb[3] = SHADE(a, a_sub); 900 src_argb0 += 4; 901 src_argb1 += 4; 902 dst_argb += 4; 903 } 904} 905#undef SHADE 906 907// Sobel functions which mimics SSSE3. 908void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, 909 uint8* dst_sobelx, int width) { 910 int i; 911 for (i = 0; i < width; ++i) { 912 int a = src_y0[i]; 913 int b = src_y1[i]; 914 int c = src_y2[i]; 915 int a_sub = src_y0[i + 2]; 916 int b_sub = src_y1[i + 2]; 917 int c_sub = src_y2[i + 2]; 918 int a_diff = a - a_sub; 919 int b_diff = b - b_sub; 920 int c_diff = c - c_sub; 921 int sobel = Abs(a_diff + b_diff * 2 + c_diff); 922 dst_sobelx[i] = (uint8)(clamp255(sobel)); 923 } 924} 925 926void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, 927 uint8* dst_sobely, int width) { 928 int i; 929 for (i = 0; i < width; ++i) { 930 int a = src_y0[i + 0]; 931 int b = src_y0[i + 1]; 932 int c = src_y0[i + 2]; 933 int a_sub = src_y1[i + 0]; 934 int b_sub = src_y1[i + 1]; 935 int c_sub = src_y1[i + 2]; 936 int a_diff = a - a_sub; 937 int b_diff = b - b_sub; 938 int c_diff = c - c_sub; 939 int sobel = Abs(a_diff + b_diff * 2 + c_diff); 940 dst_sobely[i] = (uint8)(clamp255(sobel)); 941 } 942} 943 944void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, 945 uint8* dst_argb, int width) { 946 int i; 947 for (i = 0; i < width; ++i) { 948 int r = src_sobelx[i]; 949 int b = src_sobely[i]; 950 int s = clamp255(r + b); 951 dst_argb[0] = (uint8)(s); 952 dst_argb[1] = (uint8)(s); 953 dst_argb[2] = (uint8)(s); 954 dst_argb[3] = (uint8)(255u); 955 dst_argb += 4; 956 } 957} 958 959void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, 960 uint8* dst_y, int width) { 961 int i; 962 for (i = 0; i < width; ++i) { 963 int r = src_sobelx[i]; 964 int b = src_sobely[i]; 965 int s = clamp255(r + b); 966 dst_y[i] = (uint8)(s); 967 } 968} 969 970void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, 971 uint8* dst_argb, int width) { 972 int i; 973 for (i = 0; i < width; ++i) { 974 int r = src_sobelx[i]; 975 int b = src_sobely[i]; 976 int g = clamp255(r + b); 977 dst_argb[0] = (uint8)(b); 978 dst_argb[1] = (uint8)(g); 979 dst_argb[2] = (uint8)(r); 980 dst_argb[3] = (uint8)(255u); 981 dst_argb += 4; 982 } 983} 984 985void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { 986 // Copy a Y to RGB. 987 int x; 988 for (x = 0; x < width; ++x) { 989 uint8 y = src_y[0]; 990 dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 991 dst_argb[3] = 255u; 992 dst_argb += 4; 993 ++src_y; 994 } 995} 996 997// BT.601 YUV to RGB reference 998// R = (Y - 16) * 1.164 - V * -1.596 999// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 1000// B = (Y - 16) * 1.164 - U * -2.018 1001 1002// Y contribution to R,G,B. Scale and bias. 1003// TODO(fbarchard): Consider moving constants into a common header. 1004#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 1005#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ 1006 1007// U and V contributions to R,G,B. 1008#define UB -128 /* max(-128, round(-2.018 * 64)) */ 1009#define UG 25 /* round(0.391 * 64) */ 1010#define VG 52 /* round(0.813 * 64) */ 1011#define VR -102 /* round(-1.596 * 64) */ 1012 1013// Bias values to subtract 16 from Y and 128 from U and V. 1014#define BB (UB * 128 + YGB) 1015#define BG (UG * 128 + VG * 128 + YGB) 1016#define BR (VR * 128 + YGB) 1017 1018// C reference code that mimics the YUV assembly. 1019static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, 1020 uint8* b, uint8* g, uint8* r) { 1021 uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; 1022 *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6); 1023 *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6); 1024 *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6); 1025} 1026 1027// C reference code that mimics the YUV assembly. 1028static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { 1029 uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; 1030 *b = Clamp((int32)(y1 + YGB) >> 6); 1031 *g = Clamp((int32)(y1 + YGB) >> 6); 1032 *r = Clamp((int32)(y1 + YGB) >> 6); 1033} 1034 1035#undef YG 1036#undef YGB 1037#undef UB 1038#undef UG 1039#undef VG 1040#undef VR 1041#undef BB 1042#undef BG 1043#undef BR 1044 1045// JPEG YUV to RGB reference 1046// * R = Y - V * -1.40200 1047// * G = Y - U * 0.34414 - V * 0.71414 1048// * B = Y - U * -1.77200 1049 1050// Y contribution to R,G,B. Scale and bias. 1051// TODO(fbarchard): Consider moving constants into a common header. 1052#define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ 1053#define YGBJ 32 /* 64 / 2 */ 1054 1055// U and V contributions to R,G,B. 1056#define UBJ -113 /* round(-1.77200 * 64) */ 1057#define UGJ 22 /* round(0.34414 * 64) */ 1058#define VGJ 46 /* round(0.71414 * 64) */ 1059#define VRJ -90 /* round(-1.40200 * 64) */ 1060 1061// Bias values to subtract 16 from Y and 128 from U and V. 1062#define BBJ (UBJ * 128 + YGBJ) 1063#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ) 1064#define BRJ (VRJ * 128 + YGBJ) 1065 1066// C reference code that mimics the YUV assembly. 1067static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v, 1068 uint8* b, uint8* g, uint8* r) { 1069 uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16; 1070 *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6); 1071 *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6); 1072 *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6); 1073} 1074 1075#undef YGJ 1076#undef YGBJ 1077#undef UBJ 1078#undef UGJ 1079#undef VGJ 1080#undef VRJ 1081#undef BBJ 1082#undef BGJ 1083#undef BRJ 1084 1085#if !defined(LIBYUV_DISABLE_NEON) && \ 1086 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) 1087// C mimic assembly. 1088// TODO(fbarchard): Remove subsampling from Neon. 1089void I444ToARGBRow_C(const uint8* src_y, 1090 const uint8* src_u, 1091 const uint8* src_v, 1092 uint8* rgb_buf, 1093 int width) { 1094 int x; 1095 for (x = 0; x < width - 1; x += 2) { 1096 uint8 u = (src_u[0] + src_u[1] + 1) >> 1; 1097 uint8 v = (src_v[0] + src_v[1] + 1) >> 1; 1098 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1099 rgb_buf[3] = 255; 1100 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1101 rgb_buf[7] = 255; 1102 src_y += 2; 1103 src_u += 2; 1104 src_v += 2; 1105 rgb_buf += 8; // Advance 2 pixels. 1106 } 1107 if (width & 1) { 1108 YuvPixel(src_y[0], src_u[0], src_v[0], 1109 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1110 } 1111} 1112#else 1113void I444ToARGBRow_C(const uint8* src_y, 1114 const uint8* src_u, 1115 const uint8* src_v, 1116 uint8* rgb_buf, 1117 int width) { 1118 int x; 1119 for (x = 0; x < width; ++x) { 1120 YuvPixel(src_y[0], src_u[0], src_v[0], 1121 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1122 rgb_buf[3] = 255; 1123 src_y += 1; 1124 src_u += 1; 1125 src_v += 1; 1126 rgb_buf += 4; // Advance 1 pixel. 1127 } 1128} 1129#endif 1130 1131// Also used for 420 1132void I422ToARGBRow_C(const uint8* src_y, 1133 const uint8* src_u, 1134 const uint8* src_v, 1135 uint8* rgb_buf, 1136 int width) { 1137 int x; 1138 for (x = 0; x < width - 1; x += 2) { 1139 YuvPixel(src_y[0], src_u[0], src_v[0], 1140 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1141 rgb_buf[3] = 255; 1142 YuvPixel(src_y[1], src_u[0], src_v[0], 1143 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1144 rgb_buf[7] = 255; 1145 src_y += 2; 1146 src_u += 1; 1147 src_v += 1; 1148 rgb_buf += 8; // Advance 2 pixels. 1149 } 1150 if (width & 1) { 1151 YuvPixel(src_y[0], src_u[0], src_v[0], 1152 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1153 rgb_buf[3] = 255; 1154 } 1155} 1156 1157void J422ToARGBRow_C(const uint8* src_y, 1158 const uint8* src_u, 1159 const uint8* src_v, 1160 uint8* rgb_buf, 1161 int width) { 1162 int x; 1163 for (x = 0; x < width - 1; x += 2) { 1164 YuvJPixel(src_y[0], src_u[0], src_v[0], 1165 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1166 rgb_buf[3] = 255; 1167 YuvJPixel(src_y[1], src_u[0], src_v[0], 1168 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1169 rgb_buf[7] = 255; 1170 src_y += 2; 1171 src_u += 1; 1172 src_v += 1; 1173 rgb_buf += 8; // Advance 2 pixels. 1174 } 1175 if (width & 1) { 1176 YuvJPixel(src_y[0], src_u[0], src_v[0], 1177 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1178 rgb_buf[3] = 255; 1179 } 1180} 1181 1182void I422ToRGB24Row_C(const uint8* src_y, 1183 const uint8* src_u, 1184 const uint8* src_v, 1185 uint8* rgb_buf, 1186 int width) { 1187 int x; 1188 for (x = 0; x < width - 1; x += 2) { 1189 YuvPixel(src_y[0], src_u[0], src_v[0], 1190 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1191 YuvPixel(src_y[1], src_u[0], src_v[0], 1192 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5); 1193 src_y += 2; 1194 src_u += 1; 1195 src_v += 1; 1196 rgb_buf += 6; // Advance 2 pixels. 1197 } 1198 if (width & 1) { 1199 YuvPixel(src_y[0], src_u[0], src_v[0], 1200 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1201 } 1202} 1203 1204void I422ToRAWRow_C(const uint8* src_y, 1205 const uint8* src_u, 1206 const uint8* src_v, 1207 uint8* rgb_buf, 1208 int width) { 1209 int x; 1210 for (x = 0; x < width - 1; x += 2) { 1211 YuvPixel(src_y[0], src_u[0], src_v[0], 1212 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1213 YuvPixel(src_y[1], src_u[0], src_v[0], 1214 rgb_buf + 5, rgb_buf + 4, rgb_buf + 3); 1215 src_y += 2; 1216 src_u += 1; 1217 src_v += 1; 1218 rgb_buf += 6; // Advance 2 pixels. 1219 } 1220 if (width & 1) { 1221 YuvPixel(src_y[0], src_u[0], src_v[0], 1222 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1223 } 1224} 1225 1226void I422ToARGB4444Row_C(const uint8* src_y, 1227 const uint8* src_u, 1228 const uint8* src_v, 1229 uint8* dst_argb4444, 1230 int width) { 1231 uint8 b0; 1232 uint8 g0; 1233 uint8 r0; 1234 uint8 b1; 1235 uint8 g1; 1236 uint8 r1; 1237 int x; 1238 for (x = 0; x < width - 1; x += 2) { 1239 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1240 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); 1241 b0 = b0 >> 4; 1242 g0 = g0 >> 4; 1243 r0 = r0 >> 4; 1244 b1 = b1 >> 4; 1245 g1 = g1 >> 4; 1246 r1 = r1 >> 4; 1247 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 1248 (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; 1249 src_y += 2; 1250 src_u += 1; 1251 src_v += 1; 1252 dst_argb4444 += 4; // Advance 2 pixels. 1253 } 1254 if (width & 1) { 1255 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1256 b0 = b0 >> 4; 1257 g0 = g0 >> 4; 1258 r0 = r0 >> 4; 1259 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 1260 0xf000; 1261 } 1262} 1263 1264void I422ToARGB1555Row_C(const uint8* src_y, 1265 const uint8* src_u, 1266 const uint8* src_v, 1267 uint8* dst_argb1555, 1268 int width) { 1269 uint8 b0; 1270 uint8 g0; 1271 uint8 r0; 1272 uint8 b1; 1273 uint8 g1; 1274 uint8 r1; 1275 int x; 1276 for (x = 0; x < width - 1; x += 2) { 1277 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1278 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); 1279 b0 = b0 >> 3; 1280 g0 = g0 >> 3; 1281 r0 = r0 >> 3; 1282 b1 = b1 >> 3; 1283 g1 = g1 >> 3; 1284 r1 = r1 >> 3; 1285 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 1286 (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; 1287 src_y += 2; 1288 src_u += 1; 1289 src_v += 1; 1290 dst_argb1555 += 4; // Advance 2 pixels. 1291 } 1292 if (width & 1) { 1293 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1294 b0 = b0 >> 3; 1295 g0 = g0 >> 3; 1296 r0 = r0 >> 3; 1297 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 1298 0x8000; 1299 } 1300} 1301 1302void I422ToRGB565Row_C(const uint8* src_y, 1303 const uint8* src_u, 1304 const uint8* src_v, 1305 uint8* dst_rgb565, 1306 int width) { 1307 uint8 b0; 1308 uint8 g0; 1309 uint8 r0; 1310 uint8 b1; 1311 uint8 g1; 1312 uint8 r1; 1313 int x; 1314 for (x = 0; x < width - 1; x += 2) { 1315 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1316 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); 1317 b0 = b0 >> 3; 1318 g0 = g0 >> 2; 1319 r0 = r0 >> 3; 1320 b1 = b1 >> 3; 1321 g1 = g1 >> 2; 1322 r1 = r1 >> 3; 1323 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1324 (b1 << 16) | (g1 << 21) | (r1 << 27); 1325 src_y += 2; 1326 src_u += 1; 1327 src_v += 1; 1328 dst_rgb565 += 4; // Advance 2 pixels. 1329 } 1330 if (width & 1) { 1331 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1332 b0 = b0 >> 3; 1333 g0 = g0 >> 2; 1334 r0 = r0 >> 3; 1335 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1336 } 1337} 1338 1339void I411ToARGBRow_C(const uint8* src_y, 1340 const uint8* src_u, 1341 const uint8* src_v, 1342 uint8* rgb_buf, 1343 int width) { 1344 int x; 1345 for (x = 0; x < width - 3; x += 4) { 1346 YuvPixel(src_y[0], src_u[0], src_v[0], 1347 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1348 rgb_buf[3] = 255; 1349 YuvPixel(src_y[1], src_u[0], src_v[0], 1350 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1351 rgb_buf[7] = 255; 1352 YuvPixel(src_y[2], src_u[0], src_v[0], 1353 rgb_buf + 8, rgb_buf + 9, rgb_buf + 10); 1354 rgb_buf[11] = 255; 1355 YuvPixel(src_y[3], src_u[0], src_v[0], 1356 rgb_buf + 12, rgb_buf + 13, rgb_buf + 14); 1357 rgb_buf[15] = 255; 1358 src_y += 4; 1359 src_u += 1; 1360 src_v += 1; 1361 rgb_buf += 16; // Advance 4 pixels. 1362 } 1363 if (width & 2) { 1364 YuvPixel(src_y[0], src_u[0], src_v[0], 1365 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1366 rgb_buf[3] = 255; 1367 YuvPixel(src_y[1], src_u[0], src_v[0], 1368 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1369 rgb_buf[7] = 255; 1370 src_y += 2; 1371 rgb_buf += 8; // Advance 2 pixels. 1372 } 1373 if (width & 1) { 1374 YuvPixel(src_y[0], src_u[0], src_v[0], 1375 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1376 rgb_buf[3] = 255; 1377 } 1378} 1379 1380void NV12ToARGBRow_C(const uint8* src_y, 1381 const uint8* src_uv, 1382 uint8* rgb_buf, 1383 int width) { 1384 int x; 1385 for (x = 0; x < width - 1; x += 2) { 1386 YuvPixel(src_y[0], src_uv[0], src_uv[1], 1387 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1388 rgb_buf[3] = 255; 1389 YuvPixel(src_y[1], src_uv[0], src_uv[1], 1390 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1391 rgb_buf[7] = 255; 1392 src_y += 2; 1393 src_uv += 2; 1394 rgb_buf += 8; // Advance 2 pixels. 1395 } 1396 if (width & 1) { 1397 YuvPixel(src_y[0], src_uv[0], src_uv[1], 1398 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1399 rgb_buf[3] = 255; 1400 } 1401} 1402 1403void NV21ToARGBRow_C(const uint8* src_y, 1404 const uint8* src_vu, 1405 uint8* rgb_buf, 1406 int width) { 1407 int x; 1408 for (x = 0; x < width - 1; x += 2) { 1409 YuvPixel(src_y[0], src_vu[1], src_vu[0], 1410 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1411 rgb_buf[3] = 255; 1412 1413 YuvPixel(src_y[1], src_vu[1], src_vu[0], 1414 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1415 rgb_buf[7] = 255; 1416 1417 src_y += 2; 1418 src_vu += 2; 1419 rgb_buf += 8; // Advance 2 pixels. 1420 } 1421 if (width & 1) { 1422 YuvPixel(src_y[0], src_vu[1], src_vu[0], 1423 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1424 rgb_buf[3] = 255; 1425 } 1426} 1427 1428void NV12ToRGB565Row_C(const uint8* src_y, 1429 const uint8* src_uv, 1430 uint8* dst_rgb565, 1431 int width) { 1432 uint8 b0; 1433 uint8 g0; 1434 uint8 r0; 1435 uint8 b1; 1436 uint8 g1; 1437 uint8 r1; 1438 int x; 1439 for (x = 0; x < width - 1; x += 2) { 1440 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0); 1441 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1); 1442 b0 = b0 >> 3; 1443 g0 = g0 >> 2; 1444 r0 = r0 >> 3; 1445 b1 = b1 >> 3; 1446 g1 = g1 >> 2; 1447 r1 = r1 >> 3; 1448 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1449 (b1 << 16) | (g1 << 21) | (r1 << 27); 1450 src_y += 2; 1451 src_uv += 2; 1452 dst_rgb565 += 4; // Advance 2 pixels. 1453 } 1454 if (width & 1) { 1455 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0); 1456 b0 = b0 >> 3; 1457 g0 = g0 >> 2; 1458 r0 = r0 >> 3; 1459 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1460 } 1461} 1462 1463void NV21ToRGB565Row_C(const uint8* src_y, 1464 const uint8* vsrc_u, 1465 uint8* dst_rgb565, 1466 int width) { 1467 uint8 b0; 1468 uint8 g0; 1469 uint8 r0; 1470 uint8 b1; 1471 uint8 g1; 1472 uint8 r1; 1473 int x; 1474 for (x = 0; x < width - 1; x += 2) { 1475 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); 1476 YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1); 1477 b0 = b0 >> 3; 1478 g0 = g0 >> 2; 1479 r0 = r0 >> 3; 1480 b1 = b1 >> 3; 1481 g1 = g1 >> 2; 1482 r1 = r1 >> 3; 1483 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1484 (b1 << 16) | (g1 << 21) | (r1 << 27); 1485 src_y += 2; 1486 vsrc_u += 2; 1487 dst_rgb565 += 4; // Advance 2 pixels. 1488 } 1489 if (width & 1) { 1490 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); 1491 b0 = b0 >> 3; 1492 g0 = g0 >> 2; 1493 r0 = r0 >> 3; 1494 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1495 } 1496} 1497 1498void YUY2ToARGBRow_C(const uint8* src_yuy2, 1499 uint8* rgb_buf, 1500 int width) { 1501 int x; 1502 for (x = 0; x < width - 1; x += 2) { 1503 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], 1504 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1505 rgb_buf[3] = 255; 1506 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], 1507 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1508 rgb_buf[7] = 255; 1509 src_yuy2 += 4; 1510 rgb_buf += 8; // Advance 2 pixels. 1511 } 1512 if (width & 1) { 1513 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], 1514 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1515 rgb_buf[3] = 255; 1516 } 1517} 1518 1519void UYVYToARGBRow_C(const uint8* src_uyvy, 1520 uint8* rgb_buf, 1521 int width) { 1522 int x; 1523 for (x = 0; x < width - 1; x += 2) { 1524 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], 1525 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1526 rgb_buf[3] = 255; 1527 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], 1528 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1529 rgb_buf[7] = 255; 1530 src_uyvy += 4; 1531 rgb_buf += 8; // Advance 2 pixels. 1532 } 1533 if (width & 1) { 1534 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], 1535 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1536 rgb_buf[3] = 255; 1537 } 1538} 1539 1540void I422ToBGRARow_C(const uint8* src_y, 1541 const uint8* src_u, 1542 const uint8* src_v, 1543 uint8* rgb_buf, 1544 int width) { 1545 int x; 1546 for (x = 0; x < width - 1; x += 2) { 1547 YuvPixel(src_y[0], src_u[0], src_v[0], 1548 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); 1549 rgb_buf[0] = 255; 1550 YuvPixel(src_y[1], src_u[0], src_v[0], 1551 rgb_buf + 7, rgb_buf + 6, rgb_buf + 5); 1552 rgb_buf[4] = 255; 1553 src_y += 2; 1554 src_u += 1; 1555 src_v += 1; 1556 rgb_buf += 8; // Advance 2 pixels. 1557 } 1558 if (width & 1) { 1559 YuvPixel(src_y[0], src_u[0], src_v[0], 1560 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); 1561 rgb_buf[0] = 255; 1562 } 1563} 1564 1565void I422ToABGRRow_C(const uint8* src_y, 1566 const uint8* src_u, 1567 const uint8* src_v, 1568 uint8* rgb_buf, 1569 int width) { 1570 int x; 1571 for (x = 0; x < width - 1; x += 2) { 1572 YuvPixel(src_y[0], src_u[0], src_v[0], 1573 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1574 rgb_buf[3] = 255; 1575 YuvPixel(src_y[1], src_u[0], src_v[0], 1576 rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); 1577 rgb_buf[7] = 255; 1578 src_y += 2; 1579 src_u += 1; 1580 src_v += 1; 1581 rgb_buf += 8; // Advance 2 pixels. 1582 } 1583 if (width & 1) { 1584 YuvPixel(src_y[0], src_u[0], src_v[0], 1585 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1586 rgb_buf[3] = 255; 1587 } 1588} 1589 1590void I422ToRGBARow_C(const uint8* src_y, 1591 const uint8* src_u, 1592 const uint8* src_v, 1593 uint8* rgb_buf, 1594 int width) { 1595 int x; 1596 for (x = 0; x < width - 1; x += 2) { 1597 YuvPixel(src_y[0], src_u[0], src_v[0], 1598 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); 1599 rgb_buf[0] = 255; 1600 YuvPixel(src_y[1], src_u[0], src_v[0], 1601 rgb_buf + 5, rgb_buf + 6, rgb_buf + 7); 1602 rgb_buf[4] = 255; 1603 src_y += 2; 1604 src_u += 1; 1605 src_v += 1; 1606 rgb_buf += 8; // Advance 2 pixels. 1607 } 1608 if (width & 1) { 1609 YuvPixel(src_y[0], src_u[0], src_v[0], 1610 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); 1611 rgb_buf[0] = 255; 1612 } 1613} 1614 1615void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { 1616 int x; 1617 for (x = 0; x < width - 1; x += 2) { 1618 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1619 rgb_buf[3] = 255; 1620 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1621 rgb_buf[7] = 255; 1622 src_y += 2; 1623 rgb_buf += 8; // Advance 2 pixels. 1624 } 1625 if (width & 1) { 1626 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1627 rgb_buf[3] = 255; 1628 } 1629} 1630 1631void MirrorRow_C(const uint8* src, uint8* dst, int width) { 1632 int x; 1633 src += width - 1; 1634 for (x = 0; x < width - 1; x += 2) { 1635 dst[x] = src[0]; 1636 dst[x + 1] = src[-1]; 1637 src -= 2; 1638 } 1639 if (width & 1) { 1640 dst[width - 1] = src[0]; 1641 } 1642} 1643 1644void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1645 int x; 1646 src_uv += (width - 1) << 1; 1647 for (x = 0; x < width - 1; x += 2) { 1648 dst_u[x] = src_uv[0]; 1649 dst_u[x + 1] = src_uv[-2]; 1650 dst_v[x] = src_uv[1]; 1651 dst_v[x + 1] = src_uv[-2 + 1]; 1652 src_uv -= 4; 1653 } 1654 if (width & 1) { 1655 dst_u[width - 1] = src_uv[0]; 1656 dst_v[width - 1] = src_uv[1]; 1657 } 1658} 1659 1660void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { 1661 int x; 1662 const uint32* src32 = (const uint32*)(src); 1663 uint32* dst32 = (uint32*)(dst); 1664 src32 += width - 1; 1665 for (x = 0; x < width - 1; x += 2) { 1666 dst32[x] = src32[0]; 1667 dst32[x + 1] = src32[-1]; 1668 src32 -= 2; 1669 } 1670 if (width & 1) { 1671 dst32[width - 1] = src32[0]; 1672 } 1673} 1674 1675void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1676 int x; 1677 for (x = 0; x < width - 1; x += 2) { 1678 dst_u[x] = src_uv[0]; 1679 dst_u[x + 1] = src_uv[2]; 1680 dst_v[x] = src_uv[1]; 1681 dst_v[x + 1] = src_uv[3]; 1682 src_uv += 4; 1683 } 1684 if (width & 1) { 1685 dst_u[width - 1] = src_uv[0]; 1686 dst_v[width - 1] = src_uv[1]; 1687 } 1688} 1689 1690void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 1691 int width) { 1692 int x; 1693 for (x = 0; x < width - 1; x += 2) { 1694 dst_uv[0] = src_u[x]; 1695 dst_uv[1] = src_v[x]; 1696 dst_uv[2] = src_u[x + 1]; 1697 dst_uv[3] = src_v[x + 1]; 1698 dst_uv += 4; 1699 } 1700 if (width & 1) { 1701 dst_uv[0] = src_u[width - 1]; 1702 dst_uv[1] = src_v[width - 1]; 1703 } 1704} 1705 1706void CopyRow_C(const uint8* src, uint8* dst, int count) { 1707 memcpy(dst, src, count); 1708} 1709 1710void CopyRow_16_C(const uint16* src, uint16* dst, int count) { 1711 memcpy(dst, src, count * 2); 1712} 1713 1714void SetRow_C(uint8* dst, uint8 v8, int width) { 1715 memset(dst, v8, width); 1716} 1717 1718void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { 1719 uint32* d = (uint32*)(dst_argb); 1720 int x; 1721 for (x = 0; x < width; ++x) { 1722 d[x] = v32; 1723 } 1724} 1725 1726// Filter 2 rows of YUY2 UV's (422) into U and V (420). 1727void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, 1728 uint8* dst_u, uint8* dst_v, int width) { 1729 // Output a row of UV values, filtering 2 rows of YUY2. 1730 int x; 1731 for (x = 0; x < width; x += 2) { 1732 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; 1733 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; 1734 src_yuy2 += 4; 1735 dst_u += 1; 1736 dst_v += 1; 1737 } 1738} 1739 1740// Copy row of YUY2 UV's (422) into U and V (422). 1741void YUY2ToUV422Row_C(const uint8* src_yuy2, 1742 uint8* dst_u, uint8* dst_v, int width) { 1743 // Output a row of UV values. 1744 int x; 1745 for (x = 0; x < width; x += 2) { 1746 dst_u[0] = src_yuy2[1]; 1747 dst_v[0] = src_yuy2[3]; 1748 src_yuy2 += 4; 1749 dst_u += 1; 1750 dst_v += 1; 1751 } 1752} 1753 1754// Copy row of YUY2 Y's (422) into Y (420/422). 1755void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { 1756 // Output a row of Y values. 1757 int x; 1758 for (x = 0; x < width - 1; x += 2) { 1759 dst_y[x] = src_yuy2[0]; 1760 dst_y[x + 1] = src_yuy2[2]; 1761 src_yuy2 += 4; 1762 } 1763 if (width & 1) { 1764 dst_y[width - 1] = src_yuy2[0]; 1765 } 1766} 1767 1768// Filter 2 rows of UYVY UV's (422) into U and V (420). 1769void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, 1770 uint8* dst_u, uint8* dst_v, int width) { 1771 // Output a row of UV values. 1772 int x; 1773 for (x = 0; x < width; x += 2) { 1774 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; 1775 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; 1776 src_uyvy += 4; 1777 dst_u += 1; 1778 dst_v += 1; 1779 } 1780} 1781 1782// Copy row of UYVY UV's (422) into U and V (422). 1783void UYVYToUV422Row_C(const uint8* src_uyvy, 1784 uint8* dst_u, uint8* dst_v, int width) { 1785 // Output a row of UV values. 1786 int x; 1787 for (x = 0; x < width; x += 2) { 1788 dst_u[0] = src_uyvy[0]; 1789 dst_v[0] = src_uyvy[2]; 1790 src_uyvy += 4; 1791 dst_u += 1; 1792 dst_v += 1; 1793 } 1794} 1795 1796// Copy row of UYVY Y's (422) into Y (420/422). 1797void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { 1798 // Output a row of Y values. 1799 int x; 1800 for (x = 0; x < width - 1; x += 2) { 1801 dst_y[x] = src_uyvy[1]; 1802 dst_y[x + 1] = src_uyvy[3]; 1803 src_uyvy += 4; 1804 } 1805 if (width & 1) { 1806 dst_y[width - 1] = src_uyvy[1]; 1807 } 1808} 1809 1810#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f 1811 1812// Blend src_argb0 over src_argb1 and store to dst_argb. 1813// dst_argb may be src_argb0 or src_argb1. 1814// This code mimics the SSSE3 version for better testability. 1815void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, 1816 uint8* dst_argb, int width) { 1817 int x; 1818 for (x = 0; x < width - 1; x += 2) { 1819 uint32 fb = src_argb0[0]; 1820 uint32 fg = src_argb0[1]; 1821 uint32 fr = src_argb0[2]; 1822 uint32 a = src_argb0[3]; 1823 uint32 bb = src_argb1[0]; 1824 uint32 bg = src_argb1[1]; 1825 uint32 br = src_argb1[2]; 1826 dst_argb[0] = BLEND(fb, bb, a); 1827 dst_argb[1] = BLEND(fg, bg, a); 1828 dst_argb[2] = BLEND(fr, br, a); 1829 dst_argb[3] = 255u; 1830 1831 fb = src_argb0[4 + 0]; 1832 fg = src_argb0[4 + 1]; 1833 fr = src_argb0[4 + 2]; 1834 a = src_argb0[4 + 3]; 1835 bb = src_argb1[4 + 0]; 1836 bg = src_argb1[4 + 1]; 1837 br = src_argb1[4 + 2]; 1838 dst_argb[4 + 0] = BLEND(fb, bb, a); 1839 dst_argb[4 + 1] = BLEND(fg, bg, a); 1840 dst_argb[4 + 2] = BLEND(fr, br, a); 1841 dst_argb[4 + 3] = 255u; 1842 src_argb0 += 8; 1843 src_argb1 += 8; 1844 dst_argb += 8; 1845 } 1846 1847 if (width & 1) { 1848 uint32 fb = src_argb0[0]; 1849 uint32 fg = src_argb0[1]; 1850 uint32 fr = src_argb0[2]; 1851 uint32 a = src_argb0[3]; 1852 uint32 bb = src_argb1[0]; 1853 uint32 bg = src_argb1[1]; 1854 uint32 br = src_argb1[2]; 1855 dst_argb[0] = BLEND(fb, bb, a); 1856 dst_argb[1] = BLEND(fg, bg, a); 1857 dst_argb[2] = BLEND(fr, br, a); 1858 dst_argb[3] = 255u; 1859 } 1860} 1861#undef BLEND 1862#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 1863 1864// Multiply source RGB by alpha and store to destination. 1865// This code mimics the SSSE3 version for better testability. 1866void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1867 int i; 1868 for (i = 0; i < width - 1; i += 2) { 1869 uint32 b = src_argb[0]; 1870 uint32 g = src_argb[1]; 1871 uint32 r = src_argb[2]; 1872 uint32 a = src_argb[3]; 1873 dst_argb[0] = ATTENUATE(b, a); 1874 dst_argb[1] = ATTENUATE(g, a); 1875 dst_argb[2] = ATTENUATE(r, a); 1876 dst_argb[3] = a; 1877 b = src_argb[4]; 1878 g = src_argb[5]; 1879 r = src_argb[6]; 1880 a = src_argb[7]; 1881 dst_argb[4] = ATTENUATE(b, a); 1882 dst_argb[5] = ATTENUATE(g, a); 1883 dst_argb[6] = ATTENUATE(r, a); 1884 dst_argb[7] = a; 1885 src_argb += 8; 1886 dst_argb += 8; 1887 } 1888 1889 if (width & 1) { 1890 const uint32 b = src_argb[0]; 1891 const uint32 g = src_argb[1]; 1892 const uint32 r = src_argb[2]; 1893 const uint32 a = src_argb[3]; 1894 dst_argb[0] = ATTENUATE(b, a); 1895 dst_argb[1] = ATTENUATE(g, a); 1896 dst_argb[2] = ATTENUATE(r, a); 1897 dst_argb[3] = a; 1898 } 1899} 1900#undef ATTENUATE 1901 1902// Divide source RGB by alpha and store to destination. 1903// b = (b * 255 + (a / 2)) / a; 1904// g = (g * 255 + (a / 2)) / a; 1905// r = (r * 255 + (a / 2)) / a; 1906// Reciprocal method is off by 1 on some values. ie 125 1907// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. 1908#define T(a) 0x01000000 + (0x10000 / a) 1909const uint32 fixed_invtbl8[256] = { 1910 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), 1911 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), 1912 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), 1913 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), 1914 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), 1915 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), 1916 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), 1917 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), 1918 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), 1919 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), 1920 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), 1921 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), 1922 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), 1923 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), 1924 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), 1925 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), 1926 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), 1927 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), 1928 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), 1929 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), 1930 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), 1931 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), 1932 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), 1933 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), 1934 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), 1935 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), 1936 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), 1937 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), 1938 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), 1939 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), 1940 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), 1941 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 }; 1942#undef T 1943 1944void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1945 int i; 1946 for (i = 0; i < width; ++i) { 1947 uint32 b = src_argb[0]; 1948 uint32 g = src_argb[1]; 1949 uint32 r = src_argb[2]; 1950 const uint32 a = src_argb[3]; 1951 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point 1952 b = (b * ia) >> 8; 1953 g = (g * ia) >> 8; 1954 r = (r * ia) >> 8; 1955 // Clamping should not be necessary but is free in assembly. 1956 dst_argb[0] = clamp255(b); 1957 dst_argb[1] = clamp255(g); 1958 dst_argb[2] = clamp255(r); 1959 dst_argb[3] = a; 1960 src_argb += 4; 1961 dst_argb += 4; 1962 } 1963} 1964 1965void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, 1966 const int32* previous_cumsum, int width) { 1967 int32 row_sum[4] = {0, 0, 0, 0}; 1968 int x; 1969 for (x = 0; x < width; ++x) { 1970 row_sum[0] += row[x * 4 + 0]; 1971 row_sum[1] += row[x * 4 + 1]; 1972 row_sum[2] += row[x * 4 + 2]; 1973 row_sum[3] += row[x * 4 + 3]; 1974 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; 1975 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; 1976 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; 1977 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; 1978 } 1979} 1980 1981void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, 1982 int w, int area, uint8* dst, int count) { 1983 float ooa = 1.0f / area; 1984 int i; 1985 for (i = 0; i < count; ++i) { 1986 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); 1987 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); 1988 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); 1989 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); 1990 dst += 4; 1991 tl += 4; 1992 bl += 4; 1993 } 1994} 1995 1996// Copy pixels from rotated source to destination row with a slope. 1997LIBYUV_API 1998void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, 1999 uint8* dst_argb, const float* uv_dudv, int width) { 2000 int i; 2001 // Render a row of pixels from source into a buffer. 2002 float uv[2]; 2003 uv[0] = uv_dudv[0]; 2004 uv[1] = uv_dudv[1]; 2005 for (i = 0; i < width; ++i) { 2006 int x = (int)(uv[0]); 2007 int y = (int)(uv[1]); 2008 *(uint32*)(dst_argb) = 2009 *(const uint32*)(src_argb + y * src_argb_stride + 2010 x * 4); 2011 dst_argb += 4; 2012 uv[0] += uv_dudv[2]; 2013 uv[1] += uv_dudv[3]; 2014 } 2015} 2016 2017// Blend 2 rows into 1. 2018static void HalfRow_C(const uint8* src_uv, int src_uv_stride, 2019 uint8* dst_uv, int pix) { 2020 int x; 2021 for (x = 0; x < pix; ++x) { 2022 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 2023 } 2024} 2025 2026static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, 2027 uint16* dst_uv, int pix) { 2028 int x; 2029 for (x = 0; x < pix; ++x) { 2030 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 2031 } 2032} 2033 2034// C version 2x2 -> 2x1. 2035void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, 2036 ptrdiff_t src_stride, 2037 int width, int source_y_fraction) { 2038 int y1_fraction = source_y_fraction; 2039 int y0_fraction = 256 - y1_fraction; 2040 const uint8* src_ptr1 = src_ptr + src_stride; 2041 int x; 2042 if (source_y_fraction == 0) { 2043 memcpy(dst_ptr, src_ptr, width); 2044 return; 2045 } 2046 if (source_y_fraction == 128) { 2047 HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width); 2048 return; 2049 } 2050 for (x = 0; x < width - 1; x += 2) { 2051 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2052 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 2053 src_ptr += 2; 2054 src_ptr1 += 2; 2055 dst_ptr += 2; 2056 } 2057 if (width & 1) { 2058 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2059 } 2060} 2061 2062void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, 2063 ptrdiff_t src_stride, 2064 int width, int source_y_fraction) { 2065 int y1_fraction = source_y_fraction; 2066 int y0_fraction = 256 - y1_fraction; 2067 const uint16* src_ptr1 = src_ptr + src_stride; 2068 int x; 2069 if (source_y_fraction == 0) { 2070 memcpy(dst_ptr, src_ptr, width * 2); 2071 return; 2072 } 2073 if (source_y_fraction == 128) { 2074 HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width); 2075 return; 2076 } 2077 for (x = 0; x < width - 1; x += 2) { 2078 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2079 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 2080 src_ptr += 2; 2081 src_ptr1 += 2; 2082 dst_ptr += 2; 2083 } 2084 if (width & 1) { 2085 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 2086 } 2087} 2088 2089// Use first 4 shuffler values to reorder ARGB channels. 2090void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, 2091 const uint8* shuffler, int pix) { 2092 int index0 = shuffler[0]; 2093 int index1 = shuffler[1]; 2094 int index2 = shuffler[2]; 2095 int index3 = shuffler[3]; 2096 // Shuffle a row of ARGB. 2097 int x; 2098 for (x = 0; x < pix; ++x) { 2099 // To support in-place conversion. 2100 uint8 b = src_argb[index0]; 2101 uint8 g = src_argb[index1]; 2102 uint8 r = src_argb[index2]; 2103 uint8 a = src_argb[index3]; 2104 dst_argb[0] = b; 2105 dst_argb[1] = g; 2106 dst_argb[2] = r; 2107 dst_argb[3] = a; 2108 src_argb += 4; 2109 dst_argb += 4; 2110 } 2111} 2112 2113void I422ToYUY2Row_C(const uint8* src_y, 2114 const uint8* src_u, 2115 const uint8* src_v, 2116 uint8* dst_frame, int width) { 2117 int x; 2118 for (x = 0; x < width - 1; x += 2) { 2119 dst_frame[0] = src_y[0]; 2120 dst_frame[1] = src_u[0]; 2121 dst_frame[2] = src_y[1]; 2122 dst_frame[3] = src_v[0]; 2123 dst_frame += 4; 2124 src_y += 2; 2125 src_u += 1; 2126 src_v += 1; 2127 } 2128 if (width & 1) { 2129 dst_frame[0] = src_y[0]; 2130 dst_frame[1] = src_u[0]; 2131 dst_frame[2] = 0; 2132 dst_frame[3] = src_v[0]; 2133 } 2134} 2135 2136void I422ToUYVYRow_C(const uint8* src_y, 2137 const uint8* src_u, 2138 const uint8* src_v, 2139 uint8* dst_frame, int width) { 2140 int x; 2141 for (x = 0; x < width - 1; x += 2) { 2142 dst_frame[0] = src_u[0]; 2143 dst_frame[1] = src_y[0]; 2144 dst_frame[2] = src_v[0]; 2145 dst_frame[3] = src_y[1]; 2146 dst_frame += 4; 2147 src_y += 2; 2148 src_u += 1; 2149 src_v += 1; 2150 } 2151 if (width & 1) { 2152 dst_frame[0] = src_u[0]; 2153 dst_frame[1] = src_y[0]; 2154 dst_frame[2] = src_v[0]; 2155 dst_frame[3] = 0; 2156 } 2157} 2158 2159// Maximum temporary width for wrappers to process at a time, in pixels. 2160#define MAXTWIDTH 2048 2161 2162#if !(defined(_MSC_VER) && !defined(__clang__)) && \ 2163 defined(HAS_I422TORGB565ROW_SSSE3) 2164// row_win.cc has asm version, but GCC uses 2 step wrapper. 2165void I422ToRGB565Row_SSSE3(const uint8* src_y, 2166 const uint8* src_u, 2167 const uint8* src_v, 2168 uint8* dst_rgb565, 2169 int width) { 2170 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2171 while (width > 0) { 2172 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2173 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth); 2174 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2175 src_y += twidth; 2176 src_u += twidth / 2; 2177 src_v += twidth / 2; 2178 dst_rgb565 += twidth * 2; 2179 width -= twidth; 2180 } 2181} 2182#endif 2183 2184#if defined(HAS_I422TOARGB1555ROW_SSSE3) 2185void I422ToARGB1555Row_SSSE3(const uint8* src_y, 2186 const uint8* src_u, 2187 const uint8* src_v, 2188 uint8* dst_argb1555, 2189 int width) { 2190 // Row buffer for intermediate ARGB pixels. 2191 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2192 while (width > 0) { 2193 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2194 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth); 2195 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); 2196 src_y += twidth; 2197 src_u += twidth / 2; 2198 src_v += twidth / 2; 2199 dst_argb1555 += twidth * 2; 2200 width -= twidth; 2201 } 2202} 2203#endif 2204 2205#if defined(HAS_I422TOARGB4444ROW_SSSE3) 2206void I422ToARGB4444Row_SSSE3(const uint8* src_y, 2207 const uint8* src_u, 2208 const uint8* src_v, 2209 uint8* dst_argb4444, 2210 int width) { 2211 // Row buffer for intermediate ARGB pixels. 2212 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2213 while (width > 0) { 2214 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2215 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth); 2216 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); 2217 src_y += twidth; 2218 src_u += twidth / 2; 2219 src_v += twidth / 2; 2220 dst_argb4444 += twidth * 2; 2221 width -= twidth; 2222 } 2223} 2224#endif 2225 2226#if defined(HAS_NV12TORGB565ROW_SSSE3) 2227void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv, 2228 uint8* dst_rgb565, int width) { 2229 // Row buffer for intermediate ARGB pixels. 2230 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2231 while (width > 0) { 2232 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2233 NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth); 2234 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2235 src_y += twidth; 2236 src_uv += twidth; 2237 dst_rgb565 += twidth * 2; 2238 width -= twidth; 2239 } 2240} 2241#endif 2242 2243#if defined(HAS_NV21TORGB565ROW_SSSE3) 2244void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu, 2245 uint8* dst_rgb565, int width) { 2246 // Row buffer for intermediate ARGB pixels. 2247 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); 2248 while (width > 0) { 2249 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2250 NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth); 2251 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); 2252 src_y += twidth; 2253 src_vu += twidth; 2254 dst_rgb565 += twidth * 2; 2255 width -= twidth; 2256 } 2257} 2258#endif 2259 2260#if defined(HAS_YUY2TOARGBROW_SSSE3) 2261void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) { 2262 // Row buffers for intermediate YUV pixels. 2263 SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]); 2264 SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]); 2265 SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]); 2266 while (width > 0) { 2267 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2268 YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth); 2269 YUY2ToYRow_SSE2(src_yuy2, row_y, twidth); 2270 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth); 2271 src_yuy2 += twidth * 2; 2272 dst_argb += twidth * 4; 2273 width -= twidth; 2274 } 2275} 2276#endif 2277 2278#if defined(HAS_UYVYTOARGBROW_SSSE3) 2279void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) { 2280 // Row buffers for intermediate YUV pixels. 2281 SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]); 2282 SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]); 2283 SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]); 2284 while (width > 0) { 2285 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2286 UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth); 2287 UYVYToYRow_SSE2(src_uyvy, row_y, twidth); 2288 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth); 2289 src_uyvy += twidth * 2; 2290 dst_argb += twidth * 4; 2291 width -= twidth; 2292 } 2293} 2294#endif // !defined(LIBYUV_DISABLE_X86) 2295 2296#if defined(HAS_I422TORGB565ROW_AVX2) 2297void I422ToRGB565Row_AVX2(const uint8* src_y, 2298 const uint8* src_u, 2299 const uint8* src_v, 2300 uint8* dst_rgb565, 2301 int width) { 2302 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2303 while (width > 0) { 2304 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2305 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); 2306 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2307 src_y += twidth; 2308 src_u += twidth / 2; 2309 src_v += twidth / 2; 2310 dst_rgb565 += twidth * 2; 2311 width -= twidth; 2312 } 2313} 2314#endif 2315 2316#if defined(HAS_I422TOARGB1555ROW_AVX2) 2317void I422ToARGB1555Row_AVX2(const uint8* src_y, 2318 const uint8* src_u, 2319 const uint8* src_v, 2320 uint8* dst_argb1555, 2321 int width) { 2322 // Row buffer for intermediate ARGB pixels. 2323 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2324 while (width > 0) { 2325 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2326 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); 2327 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); 2328 src_y += twidth; 2329 src_u += twidth / 2; 2330 src_v += twidth / 2; 2331 dst_argb1555 += twidth * 2; 2332 width -= twidth; 2333 } 2334} 2335#endif 2336 2337#if defined(HAS_I422TOARGB4444ROW_AVX2) 2338void I422ToARGB4444Row_AVX2(const uint8* src_y, 2339 const uint8* src_u, 2340 const uint8* src_v, 2341 uint8* dst_argb4444, 2342 int width) { 2343 // Row buffer for intermediate ARGB pixels. 2344 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2345 while (width > 0) { 2346 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2347 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); 2348 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); 2349 src_y += twidth; 2350 src_u += twidth / 2; 2351 src_v += twidth / 2; 2352 dst_argb4444 += twidth * 2; 2353 width -= twidth; 2354 } 2355} 2356#endif 2357 2358#if defined(HAS_I422TORGB24ROW_AVX2) 2359void I422ToRGB24Row_AVX2(const uint8* src_y, 2360 const uint8* src_u, 2361 const uint8* src_v, 2362 uint8* dst_rgb24, 2363 int width) { 2364 // Row buffer for intermediate ARGB pixels. 2365 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2366 while (width > 0) { 2367 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2368 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); 2369 // TODO(fbarchard): ARGBToRGB24Row_AVX2 2370 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); 2371 src_y += twidth; 2372 src_u += twidth / 2; 2373 src_v += twidth / 2; 2374 dst_rgb24 += twidth * 3; 2375 width -= twidth; 2376 } 2377} 2378#endif 2379 2380#if defined(HAS_I422TORAWROW_AVX2) 2381void I422ToRAWRow_AVX2(const uint8* src_y, 2382 const uint8* src_u, 2383 const uint8* src_v, 2384 uint8* dst_raw, 2385 int width) { 2386 // Row buffer for intermediate ARGB pixels. 2387 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2388 while (width > 0) { 2389 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2390 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth); 2391 // TODO(fbarchard): ARGBToRAWRow_AVX2 2392 ARGBToRAWRow_SSSE3(row, dst_raw, twidth); 2393 src_y += twidth; 2394 src_u += twidth / 2; 2395 src_v += twidth / 2; 2396 dst_raw += twidth * 3; 2397 width -= twidth; 2398 } 2399} 2400#endif 2401 2402#if defined(HAS_NV12TORGB565ROW_AVX2) 2403void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv, 2404 uint8* dst_rgb565, int width) { 2405 // Row buffer for intermediate ARGB pixels. 2406 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2407 while (width > 0) { 2408 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2409 NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth); 2410 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2411 src_y += twidth; 2412 src_uv += twidth; 2413 dst_rgb565 += twidth * 2; 2414 width -= twidth; 2415 } 2416} 2417#endif 2418 2419#if defined(HAS_NV21TORGB565ROW_AVX2) 2420void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu, 2421 uint8* dst_rgb565, int width) { 2422 // Row buffer for intermediate ARGB pixels. 2423 SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]); 2424 while (width > 0) { 2425 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2426 NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth); 2427 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); 2428 src_y += twidth; 2429 src_vu += twidth; 2430 dst_rgb565 += twidth * 2; 2431 width -= twidth; 2432 } 2433} 2434#endif 2435 2436#if defined(HAS_YUY2TOARGBROW_AVX2) 2437void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) { 2438 // Row buffers for intermediate YUV pixels. 2439 SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]); 2440 SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]); 2441 SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]); 2442 while (width > 0) { 2443 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2444 YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth); 2445 YUY2ToYRow_AVX2(src_yuy2, row_y, twidth); 2446 I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth); 2447 src_yuy2 += twidth * 2; 2448 dst_argb += twidth * 4; 2449 width -= twidth; 2450 } 2451} 2452#endif 2453 2454#if defined(HAS_UYVYTOARGBROW_AVX2) 2455void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) { 2456 // Row buffers for intermediate YUV pixels. 2457 SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]); 2458 SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]); 2459 SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]); 2460 while (width > 0) { 2461 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; 2462 UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth); 2463 UYVYToYRow_AVX2(src_uyvy, row_y, twidth); 2464 I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth); 2465 src_uyvy += twidth * 2; 2466 dst_argb += twidth * 4; 2467 width -= twidth; 2468 } 2469} 2470#endif // !defined(LIBYUV_DISABLE_X86) 2471 2472void ARGBPolynomialRow_C(const uint8* src_argb, 2473 uint8* dst_argb, const float* poly, 2474 int width) { 2475 int i; 2476 for (i = 0; i < width; ++i) { 2477 float b = (float)(src_argb[0]); 2478 float g = (float)(src_argb[1]); 2479 float r = (float)(src_argb[2]); 2480 float a = (float)(src_argb[3]); 2481 float b2 = b * b; 2482 float g2 = g * g; 2483 float r2 = r * r; 2484 float a2 = a * a; 2485 float db = poly[0] + poly[4] * b; 2486 float dg = poly[1] + poly[5] * g; 2487 float dr = poly[2] + poly[6] * r; 2488 float da = poly[3] + poly[7] * a; 2489 float b3 = b2 * b; 2490 float g3 = g2 * g; 2491 float r3 = r2 * r; 2492 float a3 = a2 * a; 2493 db += poly[8] * b2; 2494 dg += poly[9] * g2; 2495 dr += poly[10] * r2; 2496 da += poly[11] * a2; 2497 db += poly[12] * b3; 2498 dg += poly[13] * g3; 2499 dr += poly[14] * r3; 2500 da += poly[15] * a3; 2501 2502 dst_argb[0] = Clamp((int32)(db)); 2503 dst_argb[1] = Clamp((int32)(dg)); 2504 dst_argb[2] = Clamp((int32)(dr)); 2505 dst_argb[3] = Clamp((int32)(da)); 2506 src_argb += 4; 2507 dst_argb += 4; 2508 } 2509} 2510 2511void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, 2512 const uint8* luma, uint32 lumacoeff) { 2513 uint32 bc = lumacoeff & 0xff; 2514 uint32 gc = (lumacoeff >> 8) & 0xff; 2515 uint32 rc = (lumacoeff >> 16) & 0xff; 2516 2517 int i; 2518 for (i = 0; i < width - 1; i += 2) { 2519 // Luminance in rows, color values in columns. 2520 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + 2521 src_argb[2] * rc) & 0x7F00u) + luma; 2522 const uint8* luma1; 2523 dst_argb[0] = luma0[src_argb[0]]; 2524 dst_argb[1] = luma0[src_argb[1]]; 2525 dst_argb[2] = luma0[src_argb[2]]; 2526 dst_argb[3] = src_argb[3]; 2527 luma1 = ((src_argb[4] * bc + src_argb[5] * gc + 2528 src_argb[6] * rc) & 0x7F00u) + luma; 2529 dst_argb[4] = luma1[src_argb[4]]; 2530 dst_argb[5] = luma1[src_argb[5]]; 2531 dst_argb[6] = luma1[src_argb[6]]; 2532 dst_argb[7] = src_argb[7]; 2533 src_argb += 8; 2534 dst_argb += 8; 2535 } 2536 if (width & 1) { 2537 // Luminance in rows, color values in columns. 2538 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + 2539 src_argb[2] * rc) & 0x7F00u) + luma; 2540 dst_argb[0] = luma0[src_argb[0]]; 2541 dst_argb[1] = luma0[src_argb[1]]; 2542 dst_argb[2] = luma0[src_argb[2]]; 2543 dst_argb[3] = src_argb[3]; 2544 } 2545} 2546 2547void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { 2548 int i; 2549 for (i = 0; i < width - 1; i += 2) { 2550 dst[3] = src[3]; 2551 dst[7] = src[7]; 2552 dst += 8; 2553 src += 8; 2554 } 2555 if (width & 1) { 2556 dst[3] = src[3]; 2557 } 2558} 2559 2560void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { 2561 int i; 2562 for (i = 0; i < width - 1; i += 2) { 2563 dst[3] = src[0]; 2564 dst[7] = src[1]; 2565 dst += 8; 2566 src += 2; 2567 } 2568 if (width & 1) { 2569 dst[3] = src[0]; 2570 } 2571} 2572 2573#ifdef __cplusplus 2574} // extern "C" 2575} // namespace libyuv 2576#endif 2577