enc.c revision 8b720228d581a84fd173b6dcb2fa295b59db489a
1// Copyright 2011 Google Inc. All Rights Reserved. 2// 3// Use of this source code is governed by a BSD-style license 4// that can be found in the COPYING file in the root of the source 5// tree. An additional intellectual property rights grant can be found 6// in the file PATENTS. All contributing project authors may 7// be found in the AUTHORS file in the root of the source tree. 8// ----------------------------------------------------------------------------- 9// 10// Speed-critical encoding functions. 11// 12// Author: Skal (pascal.massimino@gmail.com) 13 14#include <assert.h> 15#include <stdlib.h> // for abs() 16 17#include "./dsp.h" 18#include "../enc/vp8enci.h" 19 20static WEBP_INLINE uint8_t clip_8b(int v) { 21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 22} 23 24static WEBP_INLINE int clip_max(int v, int max) { 25 return (v > max) ? max : v; 26} 27 28//------------------------------------------------------------------------------ 29// Compute susceptibility based on DCT-coeff histograms: 30// the higher, the "easier" the macroblock is to compress. 31 32const int VP8DspScan[16 + 4 + 4] = { 33 // Luma 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 38 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 41}; 42 43static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, 44 int start_block, int end_block, 45 VP8Histogram* const histo) { 46 int j; 47 for (j = start_block; j < end_block; ++j) { 48 int k; 49 int16_t out[16]; 50 51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 52 53 // Convert coefficients to bin. 54 for (k = 0; k < 16; ++k) { 55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? 56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); 57 histo->distribution[clipped_value]++; 58 } 59 } 60} 61 62//------------------------------------------------------------------------------ 63// run-time tables (~4k) 64 65static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 66 67// We declare this variable 'volatile' to prevent instruction reordering 68// and make sure it's set to true _last_ (so as to be thread-safe) 69static volatile int tables_ok = 0; 70 71static void InitTables(void) { 72 if (!tables_ok) { 73 int i; 74 for (i = -255; i <= 255 + 255; ++i) { 75 clip1[255 + i] = clip_8b(i); 76 } 77 tables_ok = 1; 78 } 79} 80 81 82//------------------------------------------------------------------------------ 83// Transforms (Paragraph 14.4) 84 85#define STORE(x, y, v) \ 86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 87 88static const int kC1 = 20091 + (1 << 16); 89static const int kC2 = 35468; 90#define MUL(a, b) (((a) * (b)) >> 16) 91 92static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, 93 uint8_t* dst) { 94 int C[4 * 4], *tmp; 95 int i; 96 tmp = C; 97 for (i = 0; i < 4; ++i) { // vertical pass 98 const int a = in[0] + in[8]; 99 const int b = in[0] - in[8]; 100 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); 101 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); 102 tmp[0] = a + d; 103 tmp[1] = b + c; 104 tmp[2] = b - c; 105 tmp[3] = a - d; 106 tmp += 4; 107 in++; 108 } 109 110 tmp = C; 111 for (i = 0; i < 4; ++i) { // horizontal pass 112 const int dc = tmp[0] + 4; 113 const int a = dc + tmp[8]; 114 const int b = dc - tmp[8]; 115 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 116 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 117 STORE(0, i, a + d); 118 STORE(1, i, b + c); 119 STORE(2, i, b - c); 120 STORE(3, i, a - d); 121 tmp++; 122 } 123} 124 125static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst, 126 int do_two) { 127 ITransformOne(ref, in, dst); 128 if (do_two) { 129 ITransformOne(ref + 4, in + 16, dst + 4); 130 } 131} 132 133static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { 134 int i; 135 int tmp[16]; 136 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 137 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255]) 138 const int d1 = src[1] - ref[1]; 139 const int d2 = src[2] - ref[2]; 140 const int d3 = src[3] - ref[3]; 141 const int a0 = (d0 + d3); // 10b [-510,510] 142 const int a1 = (d1 + d2); 143 const int a2 = (d1 - d2); 144 const int a3 = (d0 - d3); 145 tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160] 146 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542] 147 tmp[2 + i * 4] = (a0 - a1) * 8; 148 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9; 149 } 150 for (i = 0; i < 4; ++i) { 151 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b 152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 154 const int a3 = (tmp[0 + i] - tmp[12 + i]); 155 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b 156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 157 out[8 + i] = (a0 - a1 + 7) >> 4; 158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 159 } 160} 161 162static void ITransformWHT(const int16_t* in, int16_t* out) { 163 int tmp[16]; 164 int i; 165 for (i = 0; i < 4; ++i) { 166 const int a0 = in[0 + i] + in[12 + i]; 167 const int a1 = in[4 + i] + in[ 8 + i]; 168 const int a2 = in[4 + i] - in[ 8 + i]; 169 const int a3 = in[0 + i] - in[12 + i]; 170 tmp[0 + i] = a0 + a1; 171 tmp[8 + i] = a0 - a1; 172 tmp[4 + i] = a3 + a2; 173 tmp[12 + i] = a3 - a2; 174 } 175 for (i = 0; i < 4; ++i) { 176 const int dc = tmp[0 + i * 4] + 3; // w/ rounder 177 const int a0 = dc + tmp[3 + i * 4]; 178 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; 179 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; 180 const int a3 = dc - tmp[3 + i * 4]; 181 out[ 0] = (a0 + a1) >> 3; 182 out[16] = (a3 + a2) >> 3; 183 out[32] = (a0 - a1) >> 3; 184 out[48] = (a3 - a2) >> 3; 185 out += 64; 186 } 187} 188 189static void FTransformWHT(const int16_t* in, int16_t* out) { 190 // input is 12b signed 191 int32_t tmp[16]; 192 int i; 193 for (i = 0; i < 4; ++i, in += 64) { 194 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b 195 const int a1 = (in[1 * 16] + in[3 * 16]); 196 const int a2 = (in[1 * 16] - in[3 * 16]); 197 const int a3 = (in[0 * 16] - in[2 * 16]); 198 tmp[0 + i * 4] = a0 + a1; // 14b 199 tmp[1 + i * 4] = a3 + a2; 200 tmp[2 + i * 4] = a3 - a2; 201 tmp[3 + i * 4] = a0 - a1; 202 } 203 for (i = 0; i < 4; ++i) { 204 const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b 205 const int a1 = (tmp[4 + i] + tmp[12+ i]); 206 const int a2 = (tmp[4 + i] - tmp[12+ i]); 207 const int a3 = (tmp[0 + i] - tmp[8 + i]); 208 const int b0 = a0 + a1; // 16b 209 const int b1 = a3 + a2; 210 const int b2 = a3 - a2; 211 const int b3 = a0 - a1; 212 out[ 0 + i] = b0 >> 1; // 15b 213 out[ 4 + i] = b1 >> 1; 214 out[ 8 + i] = b2 >> 1; 215 out[12 + i] = b3 >> 1; 216 } 217} 218 219#undef MUL 220#undef STORE 221 222//------------------------------------------------------------------------------ 223// Intra predictions 224 225#define DST(x, y) dst[(x) + (y) * BPS] 226 227static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { 228 int j; 229 for (j = 0; j < size; ++j) { 230 memset(dst + j * BPS, value, size); 231 } 232} 233 234static WEBP_INLINE void VerticalPred(uint8_t* dst, 235 const uint8_t* top, int size) { 236 int j; 237 if (top) { 238 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); 239 } else { 240 Fill(dst, 127, size); 241 } 242} 243 244static WEBP_INLINE void HorizontalPred(uint8_t* dst, 245 const uint8_t* left, int size) { 246 if (left) { 247 int j; 248 for (j = 0; j < size; ++j) { 249 memset(dst + j * BPS, left[j], size); 250 } 251 } else { 252 Fill(dst, 129, size); 253 } 254} 255 256static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, 257 const uint8_t* top, int size) { 258 int y; 259 if (left) { 260 if (top) { 261 const uint8_t* const clip = clip1 + 255 - left[-1]; 262 for (y = 0; y < size; ++y) { 263 const uint8_t* const clip_table = clip + left[y]; 264 int x; 265 for (x = 0; x < size; ++x) { 266 dst[x] = clip_table[top[x]]; 267 } 268 dst += BPS; 269 } 270 } else { 271 HorizontalPred(dst, left, size); 272 } 273 } else { 274 // true motion without left samples (hence: with default 129 value) 275 // is equivalent to VE prediction where you just copy the top samples. 276 // Note that if top samples are not available, the default value is 277 // then 129, and not 127 as in the VerticalPred case. 278 if (top) { 279 VerticalPred(dst, top, size); 280 } else { 281 Fill(dst, 129, size); 282 } 283 } 284} 285 286static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, 287 const uint8_t* top, 288 int size, int round, int shift) { 289 int DC = 0; 290 int j; 291 if (top) { 292 for (j = 0; j < size; ++j) DC += top[j]; 293 if (left) { // top and left present 294 for (j = 0; j < size; ++j) DC += left[j]; 295 } else { // top, but no left 296 DC += DC; 297 } 298 DC = (DC + round) >> shift; 299 } else if (left) { // left but no top 300 for (j = 0; j < size; ++j) DC += left[j]; 301 DC += DC; 302 DC = (DC + round) >> shift; 303 } else { // no top, no left, nothing. 304 DC = 0x80; 305 } 306 Fill(dst, DC, size); 307} 308 309//------------------------------------------------------------------------------ 310// Chroma 8x8 prediction (paragraph 12.2) 311 312static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, 313 const uint8_t* top) { 314 // U block 315 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 316 VerticalPred(C8VE8 + dst, top, 8); 317 HorizontalPred(C8HE8 + dst, left, 8); 318 TrueMotion(C8TM8 + dst, left, top, 8); 319 // V block 320 dst += 8; 321 if (top) top += 8; 322 if (left) left += 16; 323 DCMode(C8DC8 + dst, left, top, 8, 8, 4); 324 VerticalPred(C8VE8 + dst, top, 8); 325 HorizontalPred(C8HE8 + dst, left, 8); 326 TrueMotion(C8TM8 + dst, left, top, 8); 327} 328 329//------------------------------------------------------------------------------ 330// luma 16x16 prediction (paragraph 12.3) 331 332static void Intra16Preds(uint8_t* dst, 333 const uint8_t* left, const uint8_t* top) { 334 DCMode(I16DC16 + dst, left, top, 16, 16, 5); 335 VerticalPred(I16VE16 + dst, top, 16); 336 HorizontalPred(I16HE16 + dst, left, 16); 337 TrueMotion(I16TM16 + dst, left, top, 16); 338} 339 340//------------------------------------------------------------------------------ 341// luma 4x4 prediction 342 343#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) 344#define AVG2(a, b) (((a) + (b) + 1) >> 1) 345 346static void VE4(uint8_t* dst, const uint8_t* top) { // vertical 347 const uint8_t vals[4] = { 348 AVG3(top[-1], top[0], top[1]), 349 AVG3(top[ 0], top[1], top[2]), 350 AVG3(top[ 1], top[2], top[3]), 351 AVG3(top[ 2], top[3], top[4]) 352 }; 353 int i; 354 for (i = 0; i < 4; ++i) { 355 memcpy(dst + i * BPS, vals, 4); 356 } 357} 358 359static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal 360 const int X = top[-1]; 361 const int I = top[-2]; 362 const int J = top[-3]; 363 const int K = top[-4]; 364 const int L = top[-5]; 365 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); 366 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); 367 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); 368 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); 369} 370 371static void DC4(uint8_t* dst, const uint8_t* top) { 372 uint32_t dc = 4; 373 int i; 374 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; 375 Fill(dst, dc >> 3, 4); 376} 377 378static void RD4(uint8_t* dst, const uint8_t* top) { 379 const int X = top[-1]; 380 const int I = top[-2]; 381 const int J = top[-3]; 382 const int K = top[-4]; 383 const int L = top[-5]; 384 const int A = top[0]; 385 const int B = top[1]; 386 const int C = top[2]; 387 const int D = top[3]; 388 DST(0, 3) = AVG3(J, K, L); 389 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 390 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 391 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 392 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 393 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 394 DST(3, 0) = AVG3(D, C, B); 395} 396 397static void LD4(uint8_t* dst, const uint8_t* top) { 398 const int A = top[0]; 399 const int B = top[1]; 400 const int C = top[2]; 401 const int D = top[3]; 402 const int E = top[4]; 403 const int F = top[5]; 404 const int G = top[6]; 405 const int H = top[7]; 406 DST(0, 0) = AVG3(A, B, C); 407 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 408 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 409 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 410 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 411 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 412 DST(3, 3) = AVG3(G, H, H); 413} 414 415static void VR4(uint8_t* dst, const uint8_t* top) { 416 const int X = top[-1]; 417 const int I = top[-2]; 418 const int J = top[-3]; 419 const int K = top[-4]; 420 const int A = top[0]; 421 const int B = top[1]; 422 const int C = top[2]; 423 const int D = top[3]; 424 DST(0, 0) = DST(1, 2) = AVG2(X, A); 425 DST(1, 0) = DST(2, 2) = AVG2(A, B); 426 DST(2, 0) = DST(3, 2) = AVG2(B, C); 427 DST(3, 0) = AVG2(C, D); 428 429 DST(0, 3) = AVG3(K, J, I); 430 DST(0, 2) = AVG3(J, I, X); 431 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 432 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 433 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 434 DST(3, 1) = AVG3(B, C, D); 435} 436 437static void VL4(uint8_t* dst, const uint8_t* top) { 438 const int A = top[0]; 439 const int B = top[1]; 440 const int C = top[2]; 441 const int D = top[3]; 442 const int E = top[4]; 443 const int F = top[5]; 444 const int G = top[6]; 445 const int H = top[7]; 446 DST(0, 0) = AVG2(A, B); 447 DST(1, 0) = DST(0, 2) = AVG2(B, C); 448 DST(2, 0) = DST(1, 2) = AVG2(C, D); 449 DST(3, 0) = DST(2, 2) = AVG2(D, E); 450 451 DST(0, 1) = AVG3(A, B, C); 452 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 453 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 454 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 455 DST(3, 2) = AVG3(E, F, G); 456 DST(3, 3) = AVG3(F, G, H); 457} 458 459static void HU4(uint8_t* dst, const uint8_t* top) { 460 const int I = top[-2]; 461 const int J = top[-3]; 462 const int K = top[-4]; 463 const int L = top[-5]; 464 DST(0, 0) = AVG2(I, J); 465 DST(2, 0) = DST(0, 1) = AVG2(J, K); 466 DST(2, 1) = DST(0, 2) = AVG2(K, L); 467 DST(1, 0) = AVG3(I, J, K); 468 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 469 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 470 DST(3, 2) = DST(2, 2) = 471 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 472} 473 474static void HD4(uint8_t* dst, const uint8_t* top) { 475 const int X = top[-1]; 476 const int I = top[-2]; 477 const int J = top[-3]; 478 const int K = top[-4]; 479 const int L = top[-5]; 480 const int A = top[0]; 481 const int B = top[1]; 482 const int C = top[2]; 483 484 DST(0, 0) = DST(2, 1) = AVG2(I, X); 485 DST(0, 1) = DST(2, 2) = AVG2(J, I); 486 DST(0, 2) = DST(2, 3) = AVG2(K, J); 487 DST(0, 3) = AVG2(L, K); 488 489 DST(3, 0) = AVG3(A, B, C); 490 DST(2, 0) = AVG3(X, A, B); 491 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 492 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 493 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 494 DST(1, 3) = AVG3(L, K, J); 495} 496 497static void TM4(uint8_t* dst, const uint8_t* top) { 498 int x, y; 499 const uint8_t* const clip = clip1 + 255 - top[-1]; 500 for (y = 0; y < 4; ++y) { 501 const uint8_t* const clip_table = clip + top[-2 - y]; 502 for (x = 0; x < 4; ++x) { 503 dst[x] = clip_table[top[x]]; 504 } 505 dst += BPS; 506 } 507} 508 509#undef DST 510#undef AVG3 511#undef AVG2 512 513// Left samples are top[-5 .. -2], top_left is top[-1], top are 514// located at top[0..3], and top right is top[4..7] 515static void Intra4Preds(uint8_t* dst, const uint8_t* top) { 516 DC4(I4DC4 + dst, top); 517 TM4(I4TM4 + dst, top); 518 VE4(I4VE4 + dst, top); 519 HE4(I4HE4 + dst, top); 520 RD4(I4RD4 + dst, top); 521 VR4(I4VR4 + dst, top); 522 LD4(I4LD4 + dst, top); 523 VL4(I4VL4 + dst, top); 524 HD4(I4HD4 + dst, top); 525 HU4(I4HU4 + dst, top); 526} 527 528//------------------------------------------------------------------------------ 529// Metric 530 531static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, 532 int w, int h) { 533 int count = 0; 534 int y, x; 535 for (y = 0; y < h; ++y) { 536 for (x = 0; x < w; ++x) { 537 const int diff = (int)a[x] - b[x]; 538 count += diff * diff; 539 } 540 a += BPS; 541 b += BPS; 542 } 543 return count; 544} 545 546static int SSE16x16(const uint8_t* a, const uint8_t* b) { 547 return GetSSE(a, b, 16, 16); 548} 549static int SSE16x8(const uint8_t* a, const uint8_t* b) { 550 return GetSSE(a, b, 16, 8); 551} 552static int SSE8x8(const uint8_t* a, const uint8_t* b) { 553 return GetSSE(a, b, 8, 8); 554} 555static int SSE4x4(const uint8_t* a, const uint8_t* b) { 556 return GetSSE(a, b, 4, 4); 557} 558 559//------------------------------------------------------------------------------ 560// Texture distortion 561// 562// We try to match the spectral content (weighted) between source and 563// reconstructed samples. 564 565// Hadamard transform 566// Returns the weighted sum of the absolute value of transformed coefficients. 567static int TTransform(const uint8_t* in, const uint16_t* w) { 568 int sum = 0; 569 int tmp[16]; 570 int i; 571 // horizontal pass 572 for (i = 0; i < 4; ++i, in += BPS) { 573 const int a0 = in[0] + in[2]; 574 const int a1 = in[1] + in[3]; 575 const int a2 = in[1] - in[3]; 576 const int a3 = in[0] - in[2]; 577 tmp[0 + i * 4] = a0 + a1; 578 tmp[1 + i * 4] = a3 + a2; 579 tmp[2 + i * 4] = a3 - a2; 580 tmp[3 + i * 4] = a0 - a1; 581 } 582 // vertical pass 583 for (i = 0; i < 4; ++i, ++w) { 584 const int a0 = tmp[0 + i] + tmp[8 + i]; 585 const int a1 = tmp[4 + i] + tmp[12+ i]; 586 const int a2 = tmp[4 + i] - tmp[12+ i]; 587 const int a3 = tmp[0 + i] - tmp[8 + i]; 588 const int b0 = a0 + a1; 589 const int b1 = a3 + a2; 590 const int b2 = a3 - a2; 591 const int b3 = a0 - a1; 592 593 sum += w[ 0] * abs(b0); 594 sum += w[ 4] * abs(b1); 595 sum += w[ 8] * abs(b2); 596 sum += w[12] * abs(b3); 597 } 598 return sum; 599} 600 601static int Disto4x4(const uint8_t* const a, const uint8_t* const b, 602 const uint16_t* const w) { 603 const int sum1 = TTransform(a, w); 604 const int sum2 = TTransform(b, w); 605 return abs(sum2 - sum1) >> 5; 606} 607 608static int Disto16x16(const uint8_t* const a, const uint8_t* const b, 609 const uint16_t* const w) { 610 int D = 0; 611 int x, y; 612 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 613 for (x = 0; x < 16; x += 4) { 614 D += Disto4x4(a + x + y, b + x + y, w); 615 } 616 } 617 return D; 618} 619 620//------------------------------------------------------------------------------ 621// Quantization 622// 623 624static const uint8_t kZigzag[16] = { 625 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 626}; 627 628// Simple quantization 629static int QuantizeBlock(int16_t in[16], int16_t out[16], 630 int n, const VP8Matrix* const mtx) { 631 int last = -1; 632 for (; n < 16; ++n) { 633 const int j = kZigzag[n]; 634 const int sign = (in[j] < 0); 635 const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 636 if (coeff > mtx->zthresh_[j]) { 637 const int Q = mtx->q_[j]; 638 const int iQ = mtx->iq_[j]; 639 const int B = mtx->bias_[j]; 640 out[n] = QUANTDIV(coeff, iQ, B); 641 if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL; 642 if (sign) out[n] = -out[n]; 643 in[j] = out[n] * Q; 644 if (out[n]) last = n; 645 } else { 646 out[n] = 0; 647 in[j] = 0; 648 } 649 } 650 return (last >= 0); 651} 652 653static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], 654 const VP8Matrix* const mtx) { 655 int n, last = -1; 656 for (n = 0; n < 16; ++n) { 657 const int j = kZigzag[n]; 658 const int sign = (in[j] < 0); 659 const int coeff = sign ? -in[j] : in[j]; 660 assert(mtx->sharpen_[j] == 0); 661 if (coeff > mtx->zthresh_[j]) { 662 const int Q = mtx->q_[j]; 663 const int iQ = mtx->iq_[j]; 664 const int B = mtx->bias_[j]; 665 out[n] = QUANTDIV(coeff, iQ, B); 666 if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL; 667 if (sign) out[n] = -out[n]; 668 in[j] = out[n] * Q; 669 if (out[n]) last = n; 670 } else { 671 out[n] = 0; 672 in[j] = 0; 673 } 674 } 675 return (last >= 0); 676} 677 678//------------------------------------------------------------------------------ 679// Block copy 680 681static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { 682 int y; 683 for (y = 0; y < size; ++y) { 684 memcpy(dst, src, size); 685 src += BPS; 686 dst += BPS; 687 } 688} 689 690static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } 691 692//------------------------------------------------------------------------------ 693// Initialization 694 695// Speed-critical function pointers. We have to initialize them to the default 696// implementations within VP8EncDspInit(). 697VP8CHisto VP8CollectHistogram; 698VP8Idct VP8ITransform; 699VP8Fdct VP8FTransform; 700VP8WHT VP8ITransformWHT; 701VP8WHT VP8FTransformWHT; 702VP8Intra4Preds VP8EncPredLuma4; 703VP8IntraPreds VP8EncPredLuma16; 704VP8IntraPreds VP8EncPredChroma8; 705VP8Metric VP8SSE16x16; 706VP8Metric VP8SSE8x8; 707VP8Metric VP8SSE16x8; 708VP8Metric VP8SSE4x4; 709VP8WMetric VP8TDisto4x4; 710VP8WMetric VP8TDisto16x16; 711VP8QuantizeBlock VP8EncQuantizeBlock; 712VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; 713VP8BlockCopy VP8Copy4x4; 714 715extern void VP8EncDspInitSSE2(void); 716extern void VP8EncDspInitNEON(void); 717 718void VP8EncDspInit(void) { 719 InitTables(); 720 721 // default C implementations 722 VP8CollectHistogram = CollectHistogram; 723 VP8ITransform = ITransform; 724 VP8FTransform = FTransform; 725 VP8ITransformWHT = ITransformWHT; 726 VP8FTransformWHT = FTransformWHT; 727 VP8EncPredLuma4 = Intra4Preds; 728 VP8EncPredLuma16 = Intra16Preds; 729 VP8EncPredChroma8 = IntraChromaPreds; 730 VP8SSE16x16 = SSE16x16; 731 VP8SSE8x8 = SSE8x8; 732 VP8SSE16x8 = SSE16x8; 733 VP8SSE4x4 = SSE4x4; 734 VP8TDisto4x4 = Disto4x4; 735 VP8TDisto16x16 = Disto16x16; 736 VP8EncQuantizeBlock = QuantizeBlock; 737 VP8EncQuantizeBlockWHT = QuantizeBlockWHT; 738 VP8Copy4x4 = Copy4x4; 739 740 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 741 if (VP8GetCPUInfo) { 742#if defined(WEBP_USE_SSE2) 743 if (VP8GetCPUInfo(kSSE2)) { 744 VP8EncDspInitSSE2(); 745 } 746#elif defined(WEBP_USE_NEON) 747 if (VP8GetCPUInfo(kNEON)) { 748 VP8EncDspInitNEON(); 749 } 750#endif 751 } 752} 753 754