enc.c revision a2415724fb3466168b2af5b08bd94ba732c0e753
1// Copyright 2011 Google Inc. All Rights Reserved. 2// 3// This code is licensed under the same terms as WebM: 4// Software License Agreement: http://www.webmproject.org/license/software/ 5// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6// ----------------------------------------------------------------------------- 7// 8// Speed-critical encoding functions. 9// 10// Author: Skal (pascal.massimino@gmail.com) 11 12#include <stdlib.h> // for abs() 13#include "./dsp.h" 14#include "../enc/vp8enci.h" 15 16#if defined(__cplusplus) || defined(c_plusplus) 17extern "C" { 18#endif 19 20static WEBP_INLINE uint8_t clip_8b(int v) { 21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 22} 23 24static WEBP_INLINE int clip_max(int v, int max) { 25 return (v > max) ? max : v; 26} 27 28//------------------------------------------------------------------------------ 29// Compute susceptibility based on DCT-coeff histograms: 30// the higher, the "easier" the macroblock is to compress. 31 32const int VP8DspScan[16 + 4 + 4] = { 33 // Luma 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 38 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 41}; 42 43static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, 44 int start_block, int end_block, 45 VP8Histogram* const histo) { 46 int j; 47 for (j = start_block; j < end_block; ++j) { 48 int k; 49 int16_t out[16]; 50 51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 52 53 // Convert coefficients to bin. 54 for (k = 0; k < 16; ++k) { 55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? 56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); 57 histo->distribution[clipped_value]++; 58 } 59 } 60} 61 62//------------------------------------------------------------------------------ 63// run-time tables (~4k) 64 65static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 66 67// We declare this variable 'volatile' to prevent instruction reordering 68// and make sure it's set to true _last_ (so as to be thread-safe) 69static volatile int tables_ok = 0; 70 71static void InitTables(void) { 72 if (!tables_ok) { 73 int i; 74 for (i = -255; i <= 255 + 255; ++i) { 75 clip1[255 + i] = clip_8b(i); 76 } 77 tables_ok = 1; 78 } 79} 80 81 82//------------------------------------------------------------------------------ 83// Transforms (Paragraph 14.4) 84 85#define STORE(x, y, v) \ 86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 87 88static const int kC1 = 20091 + (1 << 16); 89static const int kC2 = 35468; 90#define MUL(a, b) (((a) * (b)) >> 16) 91 92static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, 93 uint8_t* dst) { 94 int C[4 * 4], *tmp; 95 int i; 96 tmp = C; 97 for (i = 0; i < 4; ++i) { // vertical pass 98 const int a = in[0] + in[8]; 99 const int b = in[0] - in[8]; 100 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); 101 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); 102 tmp[0] = a + d; 103 tmp[1] = b + c; 104 tmp[2] = b - c; 105 tmp[3] = a - d; 106 tmp += 4; 107 in++; 108 } 109 110 tmp = C; 111 for (i = 0; i < 4; ++i) { // horizontal pass 112 const int dc = tmp[0] + 4; 113 const int a = dc + tmp[8]; 114 const int b = dc - tmp[8]; 115 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 116 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 117 STORE(0, i, a + d); 118 STORE(1, i, b + c); 119 STORE(2, i, b - c); 120 STORE(3, i, a - d); 121 tmp++; 122 } 123} 124 125static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, 126 int do_two) { 127 ITransformOne(ref, in, dst); 128 if (do_two) { 129 ITransformOne(ref + 4, in + 16, dst + 4); 130 } 131} 132 133static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { 134 int i; 135 int tmp[16]; 136 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 137 const int d0 = src[0] - ref[0]; 138 const int d1 = src[1] - ref[1]; 139 const int d2 = src[2] - ref[2]; 140 const int d3 = src[3] - ref[3]; 141 const int a0 = (d0 + d3) << 3; 142 const int a1 = (d1 + d2) << 3; 143 const int a2 = (d1 - d2) << 3; 144 const int a3 = (d0 - d3) << 3; 145 tmp[0 + i * 4] = (a0 + a1); 146 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12; 147 tmp[2 + i * 4] = (a0 - a1); 148 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12; 149 } 150 for (i = 0; i < 4; ++i) { 151 const int a0 = (tmp[0 + i] + tmp[12 + i]); 152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 154 const int a3 = (tmp[0 + i] - tmp[12 + i]); 155 out[0 + i] = (a0 + a1 + 7) >> 4; 156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 157 out[8 + i] = (a0 - a1 + 7) >> 4; 158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 159 } 160} 161 162static void ITransformWHT(const int16_t* in, int16_t* out) { 163 int tmp[16]; 164 int i; 165 for (i = 0; i < 4; ++i) { 166 const int a0 = in[0 + i] + in[12 + i]; 167 const int a1 = in[4 + i] + in[ 8 + i]; 168 const int a2 = in[4 + i] - in[ 8 + i]; 169 const int a3 = in[0 + i] - in[12 + i]; 170 tmp[0 + i] = a0 + a1; 171 tmp[8 + i] = a0 - a1; 172 tmp[4 + i] = a3 + a2; 173 tmp[12 + i] = a3 - a2; 174 } 175 for (i = 0; i < 4; ++i) { 176 const int dc = tmp[0 + i * 4] + 3; // w/ rounder 177 const int a0 = dc + tmp[3 + i * 4]; 178 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; 179 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; 180 const int a3 = dc - tmp[3 + i * 4]; 181 out[ 0] = (a0 + a1) >> 3; 182 out[16] = (a3 + a2) >> 3; 183 out[32] = (a0 - a1) >> 3; 184 out[48] = (a3 - a2) >> 3; 185 out += 64; 186 } 187} 188 189static void FTransformWHT(const int16_t* in, int16_t* out) { 190 int tmp[16]; 191 int i; 192 for (i = 0; i < 4; ++i, in += 64) { 193 const int a0 = (in[0 * 16] + in[2 * 16]) << 2; 194 const int a1 = (in[1 * 16] + in[3 * 16]) << 2; 195 const int a2 = (in[1 * 16] - in[3 * 16]) << 2; 196 const int a3 = (in[0 * 16] - in[2 * 16]) << 2; 197 tmp[0 + i * 4] = (a0 + a1) + (a0 != 0); 198 tmp[1 + i * 4] = a3 + a2; 199 tmp[2 + i * 4] = a3 - a2; 200 tmp[3 + i * 4] = a0 - a1; 201 } 202 for (i = 0; i < 4; ++i) { 203 const int a0 = (tmp[0 + i] + tmp[8 + i]); 204 const int a1 = (tmp[4 + i] + tmp[12+ i]); 205 const int a2 = (tmp[4 + i] - tmp[12+ i]); 206 const int a3 = (tmp[0 + i] - tmp[8 + i]); 207 const int b0 = a0 + a1; 208 const int b1 = a3 + a2; 209 const int b2 = a3 - a2; 210 const int b3 = a0 - a1; 211 out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3; 212 out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3; 213 out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3; 214 out[12 + i] = (b3 + (b3 > 0) + 3) >> 3; 215 } 216} 217 218#undef MUL 219#undef STORE 220 221//------------------------------------------------------------------------------ 222// Intra predictions 223 224#define DST(x, y) dst[(x) + (y) * BPS] 225 226static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { 227 int j; 228 for (j = 0; j < size; ++j) { 229 memset(dst + j * BPS, value, size); 230 } 231} 232 233static WEBP_INLINE void VerticalPred(uint8_t* dst, 234 const uint8_t* top, int size) { 235 int j; 236 if (top) { 237 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); 238 } else { 239 Fill(dst, 127, size); 240 } 241} 242 243static WEBP_INLINE void HorizontalPred(uint8_t* dst, 244 const uint8_t* left, int size) { 245 if (left) { 246 int j; 247 for (j = 0; j < size; ++j) { 248 memset(dst + j * BPS, left[j], size); 249 } 250 } else { 251 Fill(dst, 129, size); 252 } 253} 254 255static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, 256 const uint8_t* top, int size) { 257 int y; 258 if (left) { 259 if (top) { 260 const uint8_t* const clip = clip1 + 255 - left[-1]; 261 for (y = 0; y < size; ++y) { 262 const uint8_t* const clip_table = clip + left[y]; 263 int x; 264 for (x = 0; x < size; ++x) { 265 dst[x] = clip_table[top[x]]; 266 } 267 dst += BPS; 268 } 269 } else { 270 HorizontalPred(dst, left, size); 271 } 272 } else { 273 // true motion without left samples (hence: with default 129 value) 274 // is equivalent to VE prediction where you just copy the top samples. 275 // Note that if top samples are not available, the default value is 276 // then 129, and not 127 as in the VerticalPred case. 277 if (top) { 278 VerticalPred(dst, top, size); 279 } else { 280 Fill(dst, 129, size); 281 } 282 } 283} 284 285static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, 286 const uint8_t* top, 287 int size, int round, int shift) { 288 int DC = 0; 289 int j; 290 if (top) { 291 for (j = 0; j < size; ++j) DC += top[j]; 292 if (left) { // top and left present 293 for (j = 0; j < size; ++j) DC += left[j]; 294 } else { // top, but no left 295 DC += DC; 296 } 297 DC = (DC + round) >> shift; 298 } else if (left) { // left but no top 299 for (j = 0; j < size; ++j) DC += left[j]; 300 DC += DC; 301 DC = (DC + round) >> shift; 302 } else { // no top, no left, nothing. 303 DC = 0x80; 304 } 305 Fill(dst, DC, size); 306} 307 308//------------------------------------------------------------------------------ 309// Chroma 8x8 prediction (paragraph 12.2) 310 311static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, 312 const uint8_t* top) { 313 // U block 314 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 315 VerticalPred(C8VE8 + dst, top, 8); 316 HorizontalPred(C8HE8 + dst, left, 8); 317 TrueMotion(C8TM8 + dst, left, top, 8); 318 // V block 319 dst += 8; 320 if (top) top += 8; 321 if (left) left += 16; 322 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 323 VerticalPred(C8VE8 + dst, top, 8); 324 HorizontalPred(C8HE8 + dst, left, 8); 325 TrueMotion(C8TM8 + dst, left, top, 8); 326} 327 328//------------------------------------------------------------------------------ 329// luma 16x16 prediction (paragraph 12.3) 330 331static void Intra16Preds(uint8_t* dst, 332 const uint8_t* left, const uint8_t* top) { 333 DCMode(I16DC16 + dst, left, top, 16, 16, 5); 334 VerticalPred(I16VE16 + dst, top, 16); 335 HorizontalPred(I16HE16 + dst, left, 16); 336 TrueMotion(I16TM16 + dst, left, top, 16); 337} 338 339//------------------------------------------------------------------------------ 340// luma 4x4 prediction 341 342#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) 343#define AVG2(a, b) (((a) + (b) + 1) >> 1) 344 345static void VE4(uint8_t* dst, const uint8_t* top) { // vertical 346 const uint8_t vals[4] = { 347 AVG3(top[-1], top[0], top[1]), 348 AVG3(top[ 0], top[1], top[2]), 349 AVG3(top[ 1], top[2], top[3]), 350 AVG3(top[ 2], top[3], top[4]) 351 }; 352 int i; 353 for (i = 0; i < 4; ++i) { 354 memcpy(dst + i * BPS, vals, 4); 355 } 356} 357 358static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal 359 const int X = top[-1]; 360 const int I = top[-2]; 361 const int J = top[-3]; 362 const int K = top[-4]; 363 const int L = top[-5]; 364 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); 365 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); 366 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); 367 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); 368} 369 370static void DC4(uint8_t* dst, const uint8_t* top) { 371 uint32_t dc = 4; 372 int i; 373 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; 374 Fill(dst, dc >> 3, 4); 375} 376 377static void RD4(uint8_t* dst, const uint8_t* top) { 378 const int X = top[-1]; 379 const int I = top[-2]; 380 const int J = top[-3]; 381 const int K = top[-4]; 382 const int L = top[-5]; 383 const int A = top[0]; 384 const int B = top[1]; 385 const int C = top[2]; 386 const int D = top[3]; 387 DST(0, 3) = AVG3(J, K, L); 388 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 389 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 390 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 391 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 392 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 393 DST(3, 0) = AVG3(D, C, B); 394} 395 396static void LD4(uint8_t* dst, const uint8_t* top) { 397 const int A = top[0]; 398 const int B = top[1]; 399 const int C = top[2]; 400 const int D = top[3]; 401 const int E = top[4]; 402 const int F = top[5]; 403 const int G = top[6]; 404 const int H = top[7]; 405 DST(0, 0) = AVG3(A, B, C); 406 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 407 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 408 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 409 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 410 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 411 DST(3, 3) = AVG3(G, H, H); 412} 413 414static void VR4(uint8_t* dst, const uint8_t* top) { 415 const int X = top[-1]; 416 const int I = top[-2]; 417 const int J = top[-3]; 418 const int K = top[-4]; 419 const int A = top[0]; 420 const int B = top[1]; 421 const int C = top[2]; 422 const int D = top[3]; 423 DST(0, 0) = DST(1, 2) = AVG2(X, A); 424 DST(1, 0) = DST(2, 2) = AVG2(A, B); 425 DST(2, 0) = DST(3, 2) = AVG2(B, C); 426 DST(3, 0) = AVG2(C, D); 427 428 DST(0, 3) = AVG3(K, J, I); 429 DST(0, 2) = AVG3(J, I, X); 430 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 431 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 432 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 433 DST(3, 1) = AVG3(B, C, D); 434} 435 436static void VL4(uint8_t* dst, const uint8_t* top) { 437 const int A = top[0]; 438 const int B = top[1]; 439 const int C = top[2]; 440 const int D = top[3]; 441 const int E = top[4]; 442 const int F = top[5]; 443 const int G = top[6]; 444 const int H = top[7]; 445 DST(0, 0) = AVG2(A, B); 446 DST(1, 0) = DST(0, 2) = AVG2(B, C); 447 DST(2, 0) = DST(1, 2) = AVG2(C, D); 448 DST(3, 0) = DST(2, 2) = AVG2(D, E); 449 450 DST(0, 1) = AVG3(A, B, C); 451 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 452 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 453 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 454 DST(3, 2) = AVG3(E, F, G); 455 DST(3, 3) = AVG3(F, G, H); 456} 457 458static void HU4(uint8_t* dst, const uint8_t* top) { 459 const int I = top[-2]; 460 const int J = top[-3]; 461 const int K = top[-4]; 462 const int L = top[-5]; 463 DST(0, 0) = AVG2(I, J); 464 DST(2, 0) = DST(0, 1) = AVG2(J, K); 465 DST(2, 1) = DST(0, 2) = AVG2(K, L); 466 DST(1, 0) = AVG3(I, J, K); 467 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 468 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 469 DST(3, 2) = DST(2, 2) = 470 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 471} 472 473static void HD4(uint8_t* dst, const uint8_t* top) { 474 const int X = top[-1]; 475 const int I = top[-2]; 476 const int J = top[-3]; 477 const int K = top[-4]; 478 const int L = top[-5]; 479 const int A = top[0]; 480 const int B = top[1]; 481 const int C = top[2]; 482 483 DST(0, 0) = DST(2, 1) = AVG2(I, X); 484 DST(0, 1) = DST(2, 2) = AVG2(J, I); 485 DST(0, 2) = DST(2, 3) = AVG2(K, J); 486 DST(0, 3) = AVG2(L, K); 487 488 DST(3, 0) = AVG3(A, B, C); 489 DST(2, 0) = AVG3(X, A, B); 490 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 491 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 492 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 493 DST(1, 3) = AVG3(L, K, J); 494} 495 496static void TM4(uint8_t* dst, const uint8_t* top) { 497 int x, y; 498 const uint8_t* const clip = clip1 + 255 - top[-1]; 499 for (y = 0; y < 4; ++y) { 500 const uint8_t* const clip_table = clip + top[-2 - y]; 501 for (x = 0; x < 4; ++x) { 502 dst[x] = clip_table[top[x]]; 503 } 504 dst += BPS; 505 } 506} 507 508#undef DST 509#undef AVG3 510#undef AVG2 511 512// Left samples are top[-5 .. -2], top_left is top[-1], top are 513// located at top[0..3], and top right is top[4..7] 514static void Intra4Preds(uint8_t* dst, const uint8_t* top) { 515 DC4(I4DC4 + dst, top); 516 TM4(I4TM4 + dst, top); 517 VE4(I4VE4 + dst, top); 518 HE4(I4HE4 + dst, top); 519 RD4(I4RD4 + dst, top); 520 VR4(I4VR4 + dst, top); 521 LD4(I4LD4 + dst, top); 522 VL4(I4VL4 + dst, top); 523 HD4(I4HD4 + dst, top); 524 HU4(I4HU4 + dst, top); 525} 526 527//------------------------------------------------------------------------------ 528// Metric 529 530static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, 531 int w, int h) { 532 int count = 0; 533 int y, x; 534 for (y = 0; y < h; ++y) { 535 for (x = 0; x < w; ++x) { 536 const int diff = (int)a[x] - b[x]; 537 count += diff * diff; 538 } 539 a += BPS; 540 b += BPS; 541 } 542 return count; 543} 544 545static int SSE16x16(const uint8_t* a, const uint8_t* b) { 546 return GetSSE(a, b, 16, 16); 547} 548static int SSE16x8(const uint8_t* a, const uint8_t* b) { 549 return GetSSE(a, b, 16, 8); 550} 551static int SSE8x8(const uint8_t* a, const uint8_t* b) { 552 return GetSSE(a, b, 8, 8); 553} 554static int SSE4x4(const uint8_t* a, const uint8_t* b) { 555 return GetSSE(a, b, 4, 4); 556} 557 558//------------------------------------------------------------------------------ 559// Texture distortion 560// 561// We try to match the spectral content (weighted) between source and 562// reconstructed samples. 563 564// Hadamard transform 565// Returns the weighted sum of the absolute value of transformed coefficients. 566static int TTransform(const uint8_t* in, const uint16_t* w) { 567 int sum = 0; 568 int tmp[16]; 569 int i; 570 // horizontal pass 571 for (i = 0; i < 4; ++i, in += BPS) { 572 const int a0 = (in[0] + in[2]) << 2; 573 const int a1 = (in[1] + in[3]) << 2; 574 const int a2 = (in[1] - in[3]) << 2; 575 const int a3 = (in[0] - in[2]) << 2; 576 tmp[0 + i * 4] = a0 + a1 + (a0 != 0); 577 tmp[1 + i * 4] = a3 + a2; 578 tmp[2 + i * 4] = a3 - a2; 579 tmp[3 + i * 4] = a0 - a1; 580 } 581 // vertical pass 582 for (i = 0; i < 4; ++i, ++w) { 583 const int a0 = (tmp[0 + i] + tmp[8 + i]); 584 const int a1 = (tmp[4 + i] + tmp[12+ i]); 585 const int a2 = (tmp[4 + i] - tmp[12+ i]); 586 const int a3 = (tmp[0 + i] - tmp[8 + i]); 587 const int b0 = a0 + a1; 588 const int b1 = a3 + a2; 589 const int b2 = a3 - a2; 590 const int b3 = a0 - a1; 591 // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3 592 sum += w[ 0] * ((abs(b0) + 3) >> 3); 593 sum += w[ 4] * ((abs(b1) + 3) >> 3); 594 sum += w[ 8] * ((abs(b2) + 3) >> 3); 595 sum += w[12] * ((abs(b3) + 3) >> 3); 596 } 597 return sum; 598} 599 600static int Disto4x4(const uint8_t* const a, const uint8_t* const b, 601 const uint16_t* const w) { 602 const int sum1 = TTransform(a, w); 603 const int sum2 = TTransform(b, w); 604 return (abs(sum2 - sum1) + 8) >> 4; 605} 606 607static int Disto16x16(const uint8_t* const a, const uint8_t* const b, 608 const uint16_t* const w) { 609 int D = 0; 610 int x, y; 611 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 612 for (x = 0; x < 16; x += 4) { 613 D += Disto4x4(a + x + y, b + x + y, w); 614 } 615 } 616 return D; 617} 618 619//------------------------------------------------------------------------------ 620// Quantization 621// 622 623static const uint8_t kZigzag[16] = { 624 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 625}; 626 627// Simple quantization 628static int QuantizeBlock(int16_t in[16], int16_t out[16], 629 int n, const VP8Matrix* const mtx) { 630 int last = -1; 631 for (; n < 16; ++n) { 632 const int j = kZigzag[n]; 633 const int sign = (in[j] < 0); 634 int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 635 if (coeff > 2047) coeff = 2047; 636 if (coeff > mtx->zthresh_[j]) { 637 const int Q = mtx->q_[j]; 638 const int iQ = mtx->iq_[j]; 639 const int B = mtx->bias_[j]; 640 out[n] = QUANTDIV(coeff, iQ, B); 641 if (sign) out[n] = -out[n]; 642 in[j] = out[n] * Q; 643 if (out[n]) last = n; 644 } else { 645 out[n] = 0; 646 in[j] = 0; 647 } 648 } 649 return (last >= 0); 650} 651 652//------------------------------------------------------------------------------ 653// Block copy 654 655static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { 656 int y; 657 for (y = 0; y < size; ++y) { 658 memcpy(dst, src, size); 659 src += BPS; 660 dst += BPS; 661 } 662} 663 664static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } 665 666//------------------------------------------------------------------------------ 667// Initialization 668 669// Speed-critical function pointers. We have to initialize them to the default 670// implementations within VP8EncDspInit(). 671VP8CHisto VP8CollectHistogram; 672VP8Idct VP8ITransform; 673VP8Fdct VP8FTransform; 674VP8WHT VP8ITransformWHT; 675VP8WHT VP8FTransformWHT; 676VP8Intra4Preds VP8EncPredLuma4; 677VP8IntraPreds VP8EncPredLuma16; 678VP8IntraPreds VP8EncPredChroma8; 679VP8Metric VP8SSE16x16; 680VP8Metric VP8SSE8x8; 681VP8Metric VP8SSE16x8; 682VP8Metric VP8SSE4x4; 683VP8WMetric VP8TDisto4x4; 684VP8WMetric VP8TDisto16x16; 685VP8QuantizeBlock VP8EncQuantizeBlock; 686VP8BlockCopy VP8Copy4x4; 687 688extern void VP8EncDspInitSSE2(void); 689 690void VP8EncDspInit(void) { 691 InitTables(); 692 693 // default C implementations 694 VP8CollectHistogram = CollectHistogram; 695 VP8ITransform = ITransform; 696 VP8FTransform = FTransform; 697 VP8ITransformWHT = ITransformWHT; 698 VP8FTransformWHT = FTransformWHT; 699 VP8EncPredLuma4 = Intra4Preds; 700 VP8EncPredLuma16 = Intra16Preds; 701 VP8EncPredChroma8 = IntraChromaPreds; 702 VP8SSE16x16 = SSE16x16; 703 VP8SSE8x8 = SSE8x8; 704 VP8SSE16x8 = SSE16x8; 705 VP8SSE4x4 = SSE4x4; 706 VP8TDisto4x4 = Disto4x4; 707 VP8TDisto16x16 = Disto16x16; 708 VP8EncQuantizeBlock = QuantizeBlock; 709 VP8Copy4x4 = Copy4x4; 710 711 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 712 if (VP8GetCPUInfo) { 713#if defined(WEBP_USE_SSE2) 714 if (VP8GetCPUInfo(kSSE2)) { 715 VP8EncDspInitSSE2(); 716 } 717#endif 718 } 719} 720 721#if defined(__cplusplus) || defined(c_plusplus) 722} // extern "C" 723#endif 724