15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2011 Google Inc. All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
3eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// Use of this source code is governed by a BSD-style license
4eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// that can be found in the COPYING file in the root of the source
5eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// tree. An additional intellectual property rights grant can be found
6eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// in the file PATENTS. All contributing project authors may
7eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// be found in the AUTHORS file in the root of the source tree.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -----------------------------------------------------------------------------
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Speed-critical encoding functions.
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: Skal (pascal.massimino@gmail.com)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include <assert.h>
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>  // for abs()
165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "./dsp.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "../enc/vp8enci.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static WEBP_INLINE uint8_t clip_8b(int v) {
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static WEBP_INLINE int clip_max(int v, int max) {
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return (v > max) ? max : v;
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//------------------------------------------------------------------------------
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Compute susceptibility based on DCT-coeff histograms:
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// the higher, the "easier" the macroblock is to compress.
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int VP8DspScan[16 + 4 + 4] = {
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Luma
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                             int start_block, int end_block,
452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                             VP8Histogram* const histo) {
462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int j;
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (j = start_block; j < end_block; ++j) {
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int k;
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int16_t out[16];
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Convert coefficients to bin.
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (k = 0; k < 16; ++k) {
552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      const int v = abs(out[k]) >> 3;  // TODO(skal): add rounding?
562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      histo->distribution[clipped_value]++;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// run-time tables (~4k)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We declare this variable 'volatile' to prevent instruction reordering
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and make sure it's set to true _last_ (so as to be thread-safe)
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static volatile int tables_ok = 0;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void InitTables(void) {
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!tables_ok) {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i;
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (i = -255; i <= 255 + 255; ++i) {
752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      clip1[255 + i] = clip_8b(i);
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tables_ok = 1;
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Transforms (Paragraph 14.4)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define STORE(x, y, v) \
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kC1 = 20091 + (1 << 16);
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int kC2 = 35468;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define MUL(a, b) (((a) * (b)) >> 16)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      uint8_t* dst) {
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int C[4 * 4], *tmp;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  tmp = C;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i) {    // vertical pass
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a = in[0] + in[8];
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b = in[0] - in[8];
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[0] = a + d;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[1] = b + c;
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[2] = b - c;
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[3] = a - d;
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp += 4;
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    in++;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  tmp = C;
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i) {    // horizontal pass
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int dc = tmp[0] + 4;
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a =  dc +  tmp[8];
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b =  dc -  tmp[8];
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    STORE(0, i, a + d);
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    STORE(1, i, b + c);
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    STORE(2, i, b - c);
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    STORE(3, i, a - d);
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp++;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       int do_two) {
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ITransformOne(ref, in, dst);
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (do_two) {
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ITransformOne(ref + 4, in + 16, dst + 4);
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int tmp[16];
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int d0 = src[0] - ref[0];   // 9bit dynamic range ([-255,255])
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int d1 = src[1] - ref[1];
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int d2 = src[2] - ref[2];
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int d3 = src[3] - ref[3];
1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a0 = (d0 + d3);         // 10b                      [-510,510]
1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a1 = (d1 + d2);
1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a2 = (d1 - d2);
1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a3 = (d0 - d3);
145eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    tmp[0 + i * 4] = (a0 + a1) * 8;   // 14b                      [-8160,8160]
1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;      // [-7536,7542]
147eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    tmp[2 + i * 4] = (a0 - a1) * 8;
1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  937) >> 9;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i) {
1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a0 = (tmp[0 + i] + tmp[12 + i]);  // 15b
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a3 = (tmp[0 + i] - tmp[12 + i]);
1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    out[0 + i] = (a0 + a1 + 7) >> 4;            // 12b
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    out[8 + i] = (a0 - a1 + 7) >> 4;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void FTransformWHT(const int16_t* in, int16_t* out) {
163eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // input is 12b signed
1645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  int32_t tmp[16];
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i, in += 64) {
167eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    const int a0 = (in[0 * 16] + in[2 * 16]);  // 13b
168eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    const int a1 = (in[1 * 16] + in[3 * 16]);
169eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    const int a2 = (in[1 * 16] - in[3 * 16]);
170eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    const int a3 = (in[0 * 16] - in[2 * 16]);
171eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    tmp[0 + i * 4] = a0 + a1;   // 14b
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[1 + i * 4] = a3 + a2;
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[2 + i * 4] = a3 - a2;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[3 + i * 4] = a0 - a1;
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i) {
177eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    const int a0 = (tmp[0 + i] + tmp[8 + i]);  // 15b
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a1 = (tmp[4 + i] + tmp[12+ i]);
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a2 = (tmp[4 + i] - tmp[12+ i]);
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int a3 = (tmp[0 + i] - tmp[8 + i]);
181eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    const int b0 = a0 + a1;    // 16b
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b1 = a3 + a2;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b2 = a3 - a2;
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b3 = a0 - a1;
185eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    out[ 0 + i] = b0 >> 1;     // 15b
186eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    out[ 4 + i] = b1 >> 1;
187eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    out[ 8 + i] = b2 >> 1;
188eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    out[12 + i] = b3 >> 1;
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef MUL
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef STORE
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Intra predictions
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DST(x, y) dst[(x) + (y) * BPS]
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int j;
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (j = 0; j < size; ++j) {
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memset(dst + j * BPS, value, size);
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void VerticalPred(uint8_t* dst,
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     const uint8_t* top, int size) {
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int j;
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (top) {
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Fill(dst, 127, size);
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void HorizontalPred(uint8_t* dst,
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       const uint8_t* left, int size) {
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (left) {
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int j;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (j = 0; j < size; ++j) {
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      memset(dst + j * BPS, left[j], size);
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Fill(dst, 129, size);
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   const uint8_t* top, int size) {
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int y;
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (left) {
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (top) {
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const uint8_t* const clip = clip1 + 255 - left[-1];
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (y = 0; y < size; ++y) {
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const uint8_t* const clip_table = clip + left[y];
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int x;
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (x = 0; x < size; ++x) {
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          dst[x] = clip_table[top[x]];
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        dst += BPS;
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      HorizontalPred(dst, left, size);
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // true motion without left samples (hence: with default 129 value)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // is equivalent to VE prediction where you just copy the top samples.
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Note that if top samples are not available, the default value is
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // then 129, and not 127 as in the VerticalPred case.
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (top) {
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VerticalPred(dst, top, size);
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Fill(dst, 129, size);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               const uint8_t* top,
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               int size, int round, int shift) {
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int DC = 0;
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int j;
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (top) {
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (j = 0; j < size; ++j) DC += top[j];
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (left) {   // top and left present
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (j = 0; j < size; ++j) DC += left[j];
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {      // top, but no left
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      DC += DC;
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DC = (DC + round) >> shift;
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (left) {   // left but no top
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (j = 0; j < size; ++j) DC += left[j];
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DC += DC;
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DC = (DC + round) >> shift;
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {   // no top, no left, nothing.
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DC = 0x80;
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Fill(dst, DC, size);
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Chroma 8x8 prediction (paragraph 12.2)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             const uint8_t* top) {
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // U block
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VerticalPred(C8VE8 + dst, top, 8);
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HorizontalPred(C8HE8 + dst, left, 8);
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TrueMotion(C8TM8 + dst, left, top, 8);
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // V block
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  dst += 8;
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (top) top += 8;
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (left) left += 16;
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VerticalPred(C8VE8 + dst, top, 8);
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HorizontalPred(C8HE8 + dst, left, 8);
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TrueMotion(C8TM8 + dst, left, top, 8);
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// luma 16x16 prediction (paragraph 12.3)
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void Intra16Preds(uint8_t* dst,
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         const uint8_t* left, const uint8_t* top) {
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCMode(I16DC16 + dst, left, top, 16, 16, 5);
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VerticalPred(I16VE16 + dst, top, 16);
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HorizontalPred(I16HE16 + dst, left, 16);
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TrueMotion(I16TM16 + dst, left, top, 16);
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// luma 4x4 prediction
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define AVG2(a, b) (((a) + (b) + 1) >> 1)
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void VE4(uint8_t* dst, const uint8_t* top) {    // vertical
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint8_t vals[4] = {
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AVG3(top[-1], top[0], top[1]),
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AVG3(top[ 0], top[1], top[2]),
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AVG3(top[ 1], top[2], top[3]),
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AVG3(top[ 2], top[3], top[4])
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i;
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i) {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memcpy(dst + i * BPS, vals, 4);
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void HE4(uint8_t* dst, const uint8_t* top) {    // horizontal
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int X = top[-1];
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int I = top[-2];
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int J = top[-3];
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int K = top[-4];
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int L = top[-5];
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J);
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K);
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L);
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L);
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void DC4(uint8_t* dst, const uint8_t* top) {
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32_t dc = 4;
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i;
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Fill(dst, dc >> 3, 4);
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void RD4(uint8_t* dst, const uint8_t* top) {
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int X = top[-1];
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int I = top[-2];
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int J = top[-3];
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int K = top[-4];
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int L = top[-5];
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int A = top[0];
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int B = top[1];
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int C = top[2];
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int D = top[3];
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 3)                                     = AVG3(J, K, L);
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 0)                                     = AVG3(D, C, B);
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void LD4(uint8_t* dst, const uint8_t* top) {
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int A = top[0];
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int B = top[1];
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int C = top[2];
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int D = top[3];
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int E = top[4];
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int F = top[5];
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int G = top[6];
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int H = top[7];
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 0)                                     = AVG3(A, B, C);
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 3)                                     = AVG3(G, H, H);
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void VR4(uint8_t* dst, const uint8_t* top) {
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int X = top[-1];
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int I = top[-2];
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int J = top[-3];
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int K = top[-4];
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int A = top[0];
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int B = top[1];
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int C = top[2];
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int D = top[3];
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 0) = DST(1, 2) = AVG2(X, A);
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 0) = DST(2, 2) = AVG2(A, B);
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 0) = DST(3, 2) = AVG2(B, C);
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 0)             = AVG2(C, D);
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 3) =             AVG3(K, J, I);
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 2) =             AVG3(J, I, X);
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 1) =             AVG3(B, C, D);
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void VL4(uint8_t* dst, const uint8_t* top) {
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int A = top[0];
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int B = top[1];
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int C = top[2];
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int D = top[3];
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int E = top[4];
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int F = top[5];
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int G = top[6];
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int H = top[7];
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 0) =             AVG2(A, B);
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 0) = DST(0, 2) = AVG2(B, C);
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 0) = DST(1, 2) = AVG2(C, D);
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 0) = DST(2, 2) = AVG2(D, E);
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 1) =             AVG3(A, B, C);
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              DST(3, 2) = AVG3(E, F, G);
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              DST(3, 3) = AVG3(F, G, H);
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void HU4(uint8_t* dst, const uint8_t* top) {
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int I = top[-2];
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int J = top[-3];
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int K = top[-4];
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int L = top[-5];
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 0) =             AVG2(I, J);
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 0) = DST(0, 1) = AVG2(J, K);
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 1) = DST(0, 2) = AVG2(K, L);
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 0) =             AVG3(I, J, K);
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 2) = DST(2, 2) =
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void HD4(uint8_t* dst, const uint8_t* top) {
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int X = top[-1];
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int I = top[-2];
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int J = top[-3];
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int K = top[-4];
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int L = top[-5];
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int A = top[0];
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int B = top[1];
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int C = top[2];
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 0) = DST(2, 1) = AVG2(I, X);
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 1) = DST(2, 2) = AVG2(J, I);
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 2) = DST(2, 3) = AVG2(K, J);
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(0, 3)             = AVG2(L, K);
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(3, 0)             = AVG3(A, B, C);
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(2, 0)             = AVG3(X, A, B);
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DST(1, 3)             = AVG3(L, K, J);
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void TM4(uint8_t* dst, const uint8_t* top) {
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int x, y;
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint8_t* const clip = clip1 + 255 - top[-1];
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (y = 0; y < 4; ++y) {
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const uint8_t* const clip_table = clip + top[-2 - y];
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (x = 0; x < 4; ++x) {
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dst[x] = clip_table[top[x]];
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dst += BPS;
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef DST
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef AVG3
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#undef AVG2
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Left samples are top[-5 .. -2], top_left is top[-1], top are
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// located at top[0..3], and top right is top[4..7]
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DC4(I4DC4 + dst, top);
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TM4(I4TM4 + dst, top);
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VE4(I4VE4 + dst, top);
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HE4(I4HE4 + dst, top);
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RD4(I4RD4 + dst, top);
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VR4(I4VR4 + dst, top);
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  LD4(I4LD4 + dst, top);
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VL4(I4VL4 + dst, top);
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HD4(I4HD4 + dst, top);
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HU4(I4HU4 + dst, top);
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Metric
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              int w, int h) {
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int count = 0;
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int y, x;
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (y = 0; y < h; ++y) {
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (x = 0; x < w; ++x) {
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const int diff = (int)a[x] - b[x];
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      count += diff * diff;
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    a += BPS;
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b += BPS;
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return count;
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int SSE16x16(const uint8_t* a, const uint8_t* b) {
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return GetSSE(a, b, 16, 16);
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int SSE16x8(const uint8_t* a, const uint8_t* b) {
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return GetSSE(a, b, 16, 8);
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int SSE8x8(const uint8_t* a, const uint8_t* b) {
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return GetSSE(a, b, 8, 8);
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int SSE4x4(const uint8_t* a, const uint8_t* b) {
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return GetSSE(a, b, 4, 4);
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Texture distortion
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We try to match the spectral content (weighted) between source and
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// reconstructed samples.
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Hadamard transform
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns the weighted sum of the absolute value of transformed coefficients.
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int TTransform(const uint8_t* in, const uint16_t* w) {
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int sum = 0;
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int tmp[16];
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i;
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // horizontal pass
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i, in += BPS) {
5462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a0 = in[0] + in[2];
5472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a1 = in[1] + in[3];
5482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a2 = in[1] - in[3];
5492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a3 = in[0] - in[2];
5502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    tmp[0 + i * 4] = a0 + a1;
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[1 + i * 4] = a3 + a2;
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[2 + i * 4] = a3 - a2;
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tmp[3 + i * 4] = a0 - a1;
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // vertical pass
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (i = 0; i < 4; ++i, ++w) {
5572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a0 = tmp[0 + i] + tmp[8 + i];
5582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a1 = tmp[4 + i] + tmp[12+ i];
5592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a2 = tmp[4 + i] - tmp[12+ i];
5602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int a3 = tmp[0 + i] - tmp[8 + i];
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b0 = a0 + a1;
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b1 = a3 + a2;
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b2 = a3 - a2;
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int b3 = a0 - a1;
5652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    sum += w[ 0] * abs(b0);
5672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    sum += w[ 4] * abs(b1);
5682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    sum += w[ 8] * abs(b2);
5692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    sum += w[12] * abs(b3);
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return sum;
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    const uint16_t* const w) {
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int sum1 = TTransform(a, w);
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int sum2 = TTransform(b, w);
5782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return abs(sum2 - sum1) >> 5;
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      const uint16_t* const w) {
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int D = 0;
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int x, y;
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (x = 0; x < 16; x += 4) {
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      D += Disto4x4(a + x + y, b + x + y, w);
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return D;
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Quantization
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const uint8_t kZigzag[16] = {
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Simple quantization
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int QuantizeBlock(int16_t in[16], int16_t out[16],
6035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                         const VP8Matrix* const mtx) {
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int last = -1;
6055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int n;
6065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  for (n = 0; n < 16; ++n) {
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int j = kZigzag[n];
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int sign = (in[j] < 0);
6095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (coeff > mtx->zthresh_[j]) {
6115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const uint32_t Q = mtx->q_[j];
6125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const uint32_t iQ = mtx->iq_[j];
6135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const uint32_t B = mtx->bias_[j];
6145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      int level = QUANTDIV(coeff, iQ, B);
6155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (level > MAX_LEVEL) level = MAX_LEVEL;
6165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (sign) level = -level;
6175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      in[j] = level * Q;
6185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      out[n] = level;
6195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (level) last = n;
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      out[n] = 0;
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      in[j] = 0;
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (last >= 0);
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
6295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                            const VP8Matrix* const mtx) {
6305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  int n, last = -1;
6315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  for (n = 0; n < 16; ++n) {
6325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    const int j = kZigzag[n];
6335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    const int sign = (in[j] < 0);
6345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    const uint32_t coeff = sign ? -in[j] : in[j];
6355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    assert(mtx->sharpen_[j] == 0);
6365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if (coeff > mtx->zthresh_[j]) {
6375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const uint32_t Q = mtx->q_[j];
6385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const uint32_t iQ = mtx->iq_[j];
6395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const uint32_t B = mtx->bias_[j];
6405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      int level = QUANTDIV(coeff, iQ, B);
6415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (level > MAX_LEVEL) level = MAX_LEVEL;
6425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (sign) level = -level;
6435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      in[j] = level * Q;
6445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      out[n] = level;
6455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (level) last = n;
6465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    } else {
6475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      out[n] = 0;
6485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      in[j] = 0;
6495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    }
6505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
6515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return (last >= 0);
6525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
6535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Block copy
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int y;
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (y = 0; y < size; ++y) {
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memcpy(dst, src, size);
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    src += BPS;
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dst += BPS;
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Initialization
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Speed-critical function pointers. We have to initialize them to the default
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementations within VP8EncDspInit().
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CHisto VP8CollectHistogram;
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Idct VP8ITransform;
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Fdct VP8FTransform;
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8WHT VP8FTransformWHT;
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Intra4Preds VP8EncPredLuma4;
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8IntraPreds VP8EncPredLuma16;
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8IntraPreds VP8EncPredChroma8;
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Metric VP8SSE16x16;
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Metric VP8SSE8x8;
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Metric VP8SSE16x8;
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8Metric VP8SSE4x4;
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8WMetric VP8TDisto4x4;
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8WMetric VP8TDisto16x16;
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8QuantizeBlock VP8EncQuantizeBlock;
6875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8BlockCopy VP8Copy4x4;
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)extern void VP8EncDspInitSSE2(void);
6915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)extern void VP8EncDspInitAVX2(void);
6922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)extern void VP8EncDspInitNEON(void);
6935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)extern void VP8EncDspInitMIPS32(void);
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void VP8EncDspInit(void) {
6965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  VP8DspInit();  // common inverse transforms
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  InitTables();
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // default C implementations
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8CollectHistogram = CollectHistogram;
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8ITransform = ITransform;
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8FTransform = FTransform;
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8FTransformWHT = FTransformWHT;
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8EncPredLuma4 = Intra4Preds;
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8EncPredLuma16 = Intra16Preds;
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8EncPredChroma8 = IntraChromaPreds;
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8SSE16x16 = SSE16x16;
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8SSE8x8 = SSE8x8;
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8SSE16x8 = SSE16x8;
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8SSE4x4 = SSE4x4;
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8TDisto4x4 = Disto4x4;
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8TDisto16x16 = Disto16x16;
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8EncQuantizeBlock = QuantizeBlock;
7145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  VP8Copy4x4 = Copy4x4;
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
7185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (VP8GetCPUInfo != NULL) {
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(WEBP_USE_SSE2)
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (VP8GetCPUInfo(kSSE2)) {
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      VP8EncDspInitSSE2();
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif
7245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if defined(WEBP_USE_AVX2)
7255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (VP8GetCPUInfo(kAVX2)) {
7265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      VP8EncDspInitAVX2();
7275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
7285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif
7295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if defined(WEBP_USE_NEON)
7302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (VP8GetCPUInfo(kNEON)) {
7312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      VP8EncDspInitNEON();
7322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
7345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if defined(WEBP_USE_MIPS32)
7355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (VP8GetCPUInfo(kMIPS32)) {
7365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      VP8EncDspInitMIPS32();
7375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
7385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
742