1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2011 Google Inc. All Rights Reserved.
27c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//
30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// Use of this source code is governed by a BSD-style license
40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// that can be found in the COPYING file in the root of the source
50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// tree. An additional intellectual property rights grant can be found
60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// in the file PATENTS. All contributing project authors may
70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// be found in the AUTHORS file in the root of the source tree.
87c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// -----------------------------------------------------------------------------
97c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//
107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//   Quantization
117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//
127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Author: Skal (pascal.massimino@gmail.com)
137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#include <assert.h>
157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#include <math.h>
168b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#include <stdlib.h>  // for abs()
177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
18a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./vp8enci.h"
19a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./cost.h"
207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define DO_TRELLIS_I4  1
227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define DO_TRELLIS_UV  0   // disable trellis for UV. Risky. Not worth.
247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define USE_TDISTO 1
257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MID_ALPHA 64      // neutral value for susceptibility
277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MIN_ALPHA 30      // lowest usable value for susceptibility
288b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define MAX_ALPHA 100     // higher meaningful value for susceptibility
297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                          // power-law modulation. Must be strictly less than 1.
327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
331e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#define I4_PENALTY 4000   // Rate-penalty for quick i4/i16 decision
341e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
358b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora// number of non-zero coeffs below which we consider the block very flat
368b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora// (and apply a penalty to complex predictions)
378b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define FLATNESS_LIMIT_I16 10      // I16 mode
388b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define FLATNESS_LIMIT_I4  3       // I4 mode
398b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define FLATNESS_LIMIT_UV  2       // UV mode
408b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define FLATNESS_PENALTY   140     // roughly ~1bit per block
418b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
448b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora// #define DEBUG_BLOCK
458b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
468b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora//------------------------------------------------------------------------------
478b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
488b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#if defined(DEBUG_BLOCK)
498b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
508b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#include <stdio.h>
518b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#include <stdlib.h>
528b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
538b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorastatic void PrintBlockInfo(const VP8EncIterator* const it,
548b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora                           const VP8ModeScore* const rd) {
558b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  int i, j;
568b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int is_i16 = (it->mb_->type_ == 1);
578b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  printf("SOURCE / OUTPUT / ABS DELTA\n");
588b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  for (j = 0; j < 24; ++j) {
598b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (j == 16) printf("\n");   // newline before the U/V block
608b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
618b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("     ");
628b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
638b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("     ");
648b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 0; i < 16; ++i) {
658b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
668b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
678b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("\n");
688b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
698b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
708b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
718b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    (int)rd->score);
728b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  if (is_i16) {
738b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("Mode: %d\n", rd->mode_i16);
748b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("y_dc_levels:");
758b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
768b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("\n");
778b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  } else {
788b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("Modes[16]: ");
798b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
808b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("\n");
818b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
828b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  printf("y_ac_levels:\n");
838b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  for (j = 0; j < 16; ++j) {
848b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = is_i16 ? 1 : 0; i < 16; ++i) {
858b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      printf("%4d ", rd->y_ac_levels[j][i]);
868b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
878b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("\n");
888b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
898b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  printf("\n");
908b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  printf("uv_levels (mode=%d):\n", rd->mode_uv);
918b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  for (j = 0; j < 8; ++j) {
928b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 0; i < 16; ++i) {
938b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      printf("%4d ", rd->uv_levels[j][i]);
948b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
958b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    printf("\n");
968b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
978b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora}
988b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
998b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#endif   // DEBUG_BLOCK
1007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
101a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
1027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
103a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE int clip(int v, int m, int M) {
1047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return v < m ? m : v > M ? M : v;
1057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
1067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
107a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic const uint8_t kZigzag[16] = {
1087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
1097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint8_t kDcTable[128] = {
1127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  4,     5,   6,   7,   8,   9,  10,  10,
1137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  11,   12,  13,  14,  15,  16,  17,  17,
1147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  18,   19,  20,  20,  21,  21,  22,  22,
1157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  23,   23,  24,  25,  25,  26,  27,  28,
1167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  29,   30,  31,  32,  33,  34,  35,  36,
1177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  37,   37,  38,  39,  40,  41,  42,  43,
1187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  44,   45,  46,  46,  47,  48,  49,  50,
1197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  51,   52,  53,  54,  55,  56,  57,  58,
1207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  59,   60,  61,  62,  63,  64,  65,  66,
1217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  67,   68,  69,  70,  71,  72,  73,  74,
1227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  75,   76,  76,  77,  78,  79,  80,  81,
1237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  82,   83,  84,  85,  86,  87,  88,  89,
1247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  91,   93,  95,  96,  98, 100, 101, 102,
1257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  104, 106, 108, 110, 112, 114, 116, 118,
1267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  122, 124, 126, 128, 130, 132, 134, 136,
1277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  138, 140, 143, 145, 148, 151, 154, 157
1287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kAcTable[128] = {
1317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  4,     5,   6,   7,   8,   9,  10,  11,
1327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  12,   13,  14,  15,  16,  17,  18,  19,
1337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  20,   21,  22,  23,  24,  25,  26,  27,
1347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  28,   29,  30,  31,  32,  33,  34,  35,
1357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  36,   37,  38,  39,  40,  41,  42,  43,
1367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  44,   45,  46,  47,  48,  49,  50,  51,
1377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  52,   53,  54,  55,  56,  57,  58,  60,
1387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  62,   64,  66,  68,  70,  72,  74,  76,
1397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  78,   80,  82,  84,  86,  88,  90,  92,
1407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  94,   96,  98, 100, 102, 104, 106, 108,
1417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  110, 112, 114, 116, 119, 122, 125, 128,
1427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  131, 134, 137, 140, 143, 146, 149, 152,
1437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  155, 158, 161, 164, 167, 170, 173, 177,
1447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  181, 185, 189, 193, 197, 201, 205, 209,
1457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  213, 217, 221, 225, 229, 234, 239, 245,
1467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  249, 254, 259, 264, 269, 274, 279, 284
1477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kAcTable2[128] = {
1507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  8,     8,   9,  10,  12,  13,  15,  17,
1517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  18,   20,  21,  23,  24,  26,  27,  29,
1527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  31,   32,  34,  35,  37,  38,  40,  41,
1537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  43,   44,  46,  48,  49,  51,  52,  54,
1547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  55,   57,  58,  60,  62,  63,  65,  66,
1557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  68,   69,  71,  72,  74,  75,  77,  79,
1567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  80,   82,  83,  85,  86,  88,  89,  93,
1577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  96,   99, 102, 105, 108, 111, 114, 117,
1587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  120, 124, 127, 130, 133, 136, 139, 142,
1597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  145, 148, 151, 155, 158, 161, 164, 167,
1607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  170, 173, 176, 179, 184, 189, 193, 198,
1617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  203, 207, 212, 217, 221, 226, 230, 235,
1627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  240, 244, 249, 254, 258, 263, 268, 274,
1637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  280, 286, 292, 299, 305, 311, 317, 323,
1647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  330, 336, 342, 348, 354, 362, 370, 379,
1657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  385, 393, 401, 409, 416, 424, 432, 440
1667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1688b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorastatic const uint8_t kBiasMatrices[3][2] = {  // [luma-ac,luma-dc,chroma][dc,ac]
1698b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  { 96, 110 }, { 96, 108 }, { 110, 115 }
1707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1728b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora// Sharpening by (slightly) raising the hi-frequency coeffs.
1737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Hack-ish but helpful for mid-bitrate range. Use with care.
1748b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define SHARPEN_BITS 11  // number of descaling bits for sharpening bias
1757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint8_t kFreqSharpening[16] = {
1767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0,  30, 60, 90,
1777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  30, 60, 90, 90,
1787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  60, 90, 90, 90,
1797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  90, 90, 90, 90
1807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
182a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
1837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Initialize quantization parameters in VP8Matrix
1847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Returns the average quantizer
1867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ExpandMatrix(VP8Matrix* const m, int type) {
1878b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  int i, sum;
1888b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  for (i = 0; i < 2; ++i) {
1898b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    const int is_ac_coeff = (i > 0);
1908b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    const int bias = kBiasMatrices[type][is_ac_coeff];
1918b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->iq_[i] = (1 << QFIX) / m->q_[i];
1928b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->bias_[i] = BIAS(bias);
1938b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
1948b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    //   * zero if coeff <= zthresh
1958b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    //   * non-zero if coeff > zthresh
1968b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
1978b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
1987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 2; i < 16; ++i) {
1997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->q_[i] = m->q_[1];
2008b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->iq_[i] = m->iq_[1];
2018b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->bias_[i] = m->bias_[1];
2028b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->zthresh_[i] = m->zthresh_[1];
2037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2048b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  for (sum = 0, i = 0; i < 16; ++i) {
2058b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (type == 0) {  // we only use sharpening for AC luma coeffs
2068b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
2078b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    } else {
2088b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      m->sharpen_[i] = 0;
2098b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
2108b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    sum += m->q_[i];
2117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return (sum + 8) >> 4;
2137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
2147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SetupMatrices(VP8Encoder* enc) {
2167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
2177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int tlambda_scale =
2187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    (enc->method_ >= 4) ? enc->config_->sns_strength
2197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                        : 0;
2207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int num_segments = enc->segment_hdr_.num_segments_;
2217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < num_segments; ++i) {
2227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8SegmentInfo* const m = &enc->dqm_[i];
2237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int q = m->quant_;
2247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int q4, q16, quv;
2257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
2267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
2277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
2297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
2307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
2327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
2337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    q4  = ExpandMatrix(&m->y1_, 0);
2357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    q16 = ExpandMatrix(&m->y2_, 1);
2367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    quv = ExpandMatrix(&m->uv_, 2);
2377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2388b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_i4_          = (3 * q4 * q4) >> 7;
2398b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_i16_         = (3 * q16 * q16);
2408b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_uv_          = (3 * quv * quv) >> 6;
2418b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_mode_        = (1 * q4 * q4) >> 7;
2428b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
2438b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_trellis_i16_ = (q16 * q16) >> 2;
2448b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->lambda_trellis_uv_  = (quv *quv) << 1;
2458b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->tlambda_            = (tlambda_scale * q4) >> 5;
2468b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
2478b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->min_disto_ = 10 * m->y1_.q_[0];   // quantization-aware min disto
2488b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->max_edge_  = 0;
2497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
2517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
252a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
2537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Initialize filtering parameters
2547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Very small filter-strength values have close to no visual effect. So we can
2567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// save a little decoding-CPU by turning filtering off for these.
2578b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora#define FSTRENGTH_CUTOFF 2
2587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SetupFilterStrength(VP8Encoder* const enc) {
2607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
2618b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
2628b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int level0 = 5 * enc->config_->filter_strength;
2637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
2648b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    VP8SegmentInfo* const m = &enc->dqm_[i];
2658b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    // We focus on the quantization of AC coeffs.
2668b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
2678b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    const int base_strength =
2688b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora        VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
2698b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    // Segments with lower complexity ('beta') will be less filtered.
2708b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    const int f = base_strength * level0 / (256 + m->beta_);
2718b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
2727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We record the initial strength (mainly for the case of 1-segment only).
2747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
2757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
2767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
2777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
2787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
279a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
2807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Note: if you change the values below, remember that the max range
2827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// allowed by the syntax for DQ_UV is [-16,16].
2837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MAX_DQ_UV (6)
2847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MIN_DQ_UV (-4)
2857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// We want to emulate jpeg-like behaviour where the expected "good" quality
2877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// is around q=75. Internally, our "good" middle is around c=50. So we
2887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// map accordingly using linear piece-wise function
2891e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic double QualityToCompression(double c) {
2901e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
2911e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // The file size roughly scales as pow(quantizer, 3.). Actually, the
2921e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
2931e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // in the mid-quant range. So we scale the compressibility inversely to
2941e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // this power-law: quant ~= compression ^ 1/3. This law holds well for
2958b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // low quant. Finer modeling for high-quant would make use of kAcTable[]
2961e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // more explicitly.
2971e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double v = pow(linear_c, 1 / 3.);
2981e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  return v;
2991e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora}
3001e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
3011e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic double QualityToJPEGCompression(double c, double alpha) {
3021e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // We map the complexity 'alpha' and quality setting 'c' to a compression
3031e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // exponent empirically matched to the compression curve of libjpeg6b.
3041e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // On average, the WebP output size will be roughly similar to that of a
3051e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // JPEG file compressed with same quality factor.
3061e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double amin = 0.30;
3071e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double amax = 0.85;
3081e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double exp_min = 0.4;
3091e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double exp_max = 0.9;
3101e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double slope = (exp_min - exp_max) / (amax - amin);
3111e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // Linearly interpolate 'expn' from exp_min to exp_max
3121e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  // in the [amin, amax] range.
3131e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double expn = (alpha > amax) ? exp_min
3141e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora                    : (alpha < amin) ? exp_max
3151e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora                    : exp_max + slope * (alpha - amin);
3161e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double v = pow(c, expn);
3171e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  return v;
3181e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora}
3191e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
3201e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
3211e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora                                 const VP8SegmentInfo* const S2) {
3221e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
3231e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora}
3241e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
3251e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic void SimplifySegments(VP8Encoder* const enc) {
3261e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
3271e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const int num_segments = enc->segment_hdr_.num_segments_;
3281e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  int num_final_segments = 1;
3291e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  int s1, s2;
3301e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  for (s1 = 1; s1 < num_segments; ++s1) {    // find similar segments
3311e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
3321e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    int found = 0;
3331e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // check if we already have similar segment
3341e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    for (s2 = 0; s2 < num_final_segments; ++s2) {
3351e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
3361e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      if (SegmentsAreEquivalent(S1, S2)) {
3371e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        found = 1;
3381e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        break;
3391e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      }
3401e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    }
3411e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    map[s1] = s2;
3421e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    if (!found) {
3431e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      if (num_final_segments != s1) {
3441e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        enc->dqm_[num_final_segments] = enc->dqm_[s1];
3451e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      }
3461e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      ++num_final_segments;
3471e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    }
3481e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
3491e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (num_final_segments < num_segments) {  // Remap
3501e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    int i = enc->mb_w_ * enc->mb_h_;
3511e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
3521e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    enc->segment_hdr_.num_segments_ = num_final_segments;
3531e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // Replicate the trailing segment infos (it's mostly cosmetics)
3541e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    for (i = num_final_segments; i < num_segments; ++i) {
3551e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
3561e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    }
3571e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
3587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
3617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
3627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int dq_uv_ac, dq_uv_dc;
3631e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const int num_segments = enc->segment_hdr_.num_segments_;
3647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
3651e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double Q = quality / 100.;
3661e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const double c_base = enc->config_->emulate_jpeg_size ?
3671e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
3681e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      QualityToCompression(Q);
3697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < num_segments; ++i) {
3701e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // We modulate the base coefficient to accommodate for the quantization
3711e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // susceptibility and allow denser segments to be quantized more.
3721e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    const double expn = 1. - amp * enc->dqm_[i].alpha_;
3737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const double c = pow(c_base, expn);
3747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int q = (int)(127. * (1. - c));
3757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    assert(expn > 0.);
3767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    enc->dqm_[i].quant_ = clip(q, 0, 127);
3777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
3787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // purely indicative in the bitstream (except for the 1-segment case)
3807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->base_quant_ = enc->dqm_[0].quant_;
3817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // fill-in values for the unused segments (required by the syntax)
3837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
3847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    enc->dqm_[i].quant_ = enc->base_quant_;
3857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
3867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // uv_alpha_ is normally spread around ~60. The useful range is
3887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
3897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
3907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
3917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                          / (MAX_ALPHA - MIN_ALPHA);
3927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // we rescale by the user-defined strength of adaptation
3937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
3947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // and make it safe.
3957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
3967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We also boost the dc-uv-quant a little, based on sns-strength, since
3977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // U/V channels are quite more reactive to high quants (flat DC-blocks
398af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  // tend to appear, and are unpleasant).
3997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_dc = -4 * enc->config_->sns_strength / 100;
4007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed
4017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_y1_dc_ = 0;       // TODO(skal): dq-lum
4037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_y2_dc_ = 0;
4047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_y2_ac_ = 0;
4057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_uv_dc_ = dq_uv_dc;
4067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_uv_ac_ = dq_uv_ac;
4077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
408b6dbce6bfeaabde2a7b581c4c6888d532d32f3acDerek Sollenberger  SetupFilterStrength(enc);   // initialize segments' filtering, eventually
4091e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
4101e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (num_segments > 1) SimplifySegments(enc);
4111e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
4121e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  SetupMatrices(enc);         // finalize quantization matrices
4137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
415a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
4167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Form the predictions in cache
4177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
4197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
4207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
4217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
4237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8I4ModeOffsets[NUM_BMODES] = {
4247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
4257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
4267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8MakeLuma16Preds(const VP8EncIterator* const it) {
4288b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
4298b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
4307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8EncPredLuma16(it->yuv_p_, left, top);
4317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8MakeChroma8Preds(const VP8EncIterator* const it) {
4348b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
4358b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
4367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8EncPredChroma8(it->yuv_p_, left, top);
4377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8MakeIntra4Preds(const VP8EncIterator* const it) {
4407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
4417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
443a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
4447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Quantize
4457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Layout:
4477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// +----+
4487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 0
4497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 4
4507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 8
4517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 12
4527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// +----+
4537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |UUVV| 16
4547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |UUVV| 20
4557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// +----+
4567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
457af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Aroraconst int VP8Scan[16] = {  // Luma
4587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
4597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
4607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
4617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
462af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora};
4637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
464af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic const int VP8ScanUV[4 + 4] = {
4657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
4667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
4677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
4687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
469a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
4707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Distortion measurement
4717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kWeightY[16] = {
4737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
4747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
4757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kWeightTrellis[16] = {
4777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#if USE_TDISTO == 0
4787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
4797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#else
4807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  30, 27, 19, 11,
4817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  27, 24, 17, 10,
4827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  19, 17, 12,  8,
4837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  11, 10,  8,  6
4847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#endif
4857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
4867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Init/Copy the common fields in score.
4887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void InitScore(VP8ModeScore* const rd) {
4897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->D  = 0;
4907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->SD = 0;
4917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->R  = 0;
4928b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  rd->H  = 0;
4937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->nz = 0;
4947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->score = MAX_COST;
4957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
4987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->D  = src->D;
4997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->SD = src->SD;
5007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->R  = src->R;
5018b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  dst->H  = src->H;
5027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->nz = src->nz;      // note that nz is not accumulated, but just copied.
5037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->score = src->score;
5047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
5057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
5077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->D  += src->D;
5087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->SD += src->SD;
5097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->R  += src->R;
5108b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  dst->H  += src->H;
5117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->nz |= src->nz;     // here, new nz bits are accumulated.
5127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->score += src->score;
5137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
5147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
515a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
5167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Performs trellis-optimized quantization.
5177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
518af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora// Trellis node
5197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroratypedef struct {
520af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  int8_t prev;            // best previous node
521af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  int8_t sign;            // sign of coeff_i
522af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  int16_t level;          // level
5237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora} Node;
5247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
525af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora// Score state
526af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Aroratypedef struct {
527af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  score_t score;          // partial RD score
528af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const uint16_t* costs;  // shortcut to cost tables
529af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} ScoreState;
530af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
5317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// If a coefficient was quantized to a value Q (using a neutral bias),
5327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
5337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// We don't test negative values though.
5347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MIN_DELTA 0   // how much lower level to try
5357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MAX_DELTA 1   // how much higher
5367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
537af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
538af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
5397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
540a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
5417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // TODO: incorporate the "* 256" in the tables?
5428b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
5437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
5447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
545a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
546a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora                                          score_t distortion) {
5477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return rate * lambda + 256 * distortion;
5487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
5497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
550af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic int TrellisQuantizeBlock(const VP8Encoder* const enc,
5517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                int16_t in[16], int16_t out[16],
5527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                int ctx0, int coeff_type,
5537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                const VP8Matrix* const mtx,
5547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                int lambda) {
555af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
556af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
5577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int first = (coeff_type == 0) ? 1 : 0;
558af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  Node nodes[16][NUM_NODES];
559af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  ScoreState score_states[2][NUM_NODES];
560af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
561af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
5627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
5637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  score_t best_score;
564af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  int n, m, p, last;
5657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  {
5677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    score_t cost;
5687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
569af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    const int last_proba = probas[VP8EncBands[first]][ctx0][0];
5707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
571af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    // compute the position of the last interesting coefficient
572af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    last = first - 1;
573af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    for (n = 15; n >= first; --n) {
574af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      const int j = kZigzag[n];
5757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      const int err = in[j] * in[j];
576af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      if (err > thresh) {
577af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        last = n;
578af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        break;
579af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      }
5807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
5817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // we don't need to go inspect up to n = 16 coeffs. We can just go up
5827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // to last + 1 (inclusive) without losing much.
5837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (last < 15) ++last;
5847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // compute 'skip' score. This is the max score one can do.
5867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    cost = VP8BitCost(0, last_proba);
587af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    best_score = RDScoreTrellis(lambda, cost, 0);
5887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // initialize source node.
5907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
591af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
592af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
593af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
5947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
5957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
5967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // traverse trellis.
5987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (n = first; n <= last; ++n) {
599af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    const int j = kZigzag[n];
600af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    const uint32_t Q  = mtx->q_[j];
601af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    const uint32_t iQ = mtx->iq_[j];
602af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    const uint32_t B = BIAS(0x00);     // neutral bias
6037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // note: it's important to take sign of the _original_ coeff,
6047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // so we don't have to consider level < 0 afterward.
6057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int sign = (in[j] < 0);
606af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
6078b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    int level0 = QUANTDIV(coeff0, iQ, B);
6088b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
6097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
610af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    {   // Swap current and previous score states
611af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ScoreState* const tmp = ss_cur;
612af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_cur = ss_prev;
613af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_prev = tmp;
614af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    }
615af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
6167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // test all alternate level values around level0.
6177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
6187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      Node* const cur = &NODE(n, m);
6197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      int level = level0 + m;
620af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      const int ctx = (level > 2) ? 2 : level;
621af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      const int band = VP8EncBands[n + 1];
622af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      score_t base_score, last_pos_score;
623af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      score_t best_cur_score = MAX_COST;
624af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      int best_prev = 0;   // default, in case
625af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
626af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_cur[m].score = MAX_COST;
627af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_cur[m].costs = costs[band][ctx];
6288b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      if (level > MAX_LEVEL || level < 0) {   // node is dead?
6297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        continue;
6307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
6317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
632af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      // Compute extra rate cost if last coeff's position is < 15
633af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      {
634af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        const score_t last_pos_cost =
635af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
636af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
637af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      }
638af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
639af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      {
640af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        // Compute delta_error = how much coding this level will
641af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        // subtract to max_error as distortion.
642af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
643af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        const int new_error = coeff0 - level * Q;
644af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        const int delta_error =
645af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora            kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
646af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        base_score = RDScoreTrellis(lambda, 0, delta_error);
647af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      }
6487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Inspect all possible non-dead predecessors. Retain only the best one.
6507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
651af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
652af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        // eliminated since their score can't be better than the current best.
653af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
6547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        // Examine node assuming it's a non-terminal one.
655af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        const score_t score =
656af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
657af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        if (score < best_cur_score) {
658af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora          best_cur_score = score;
659af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora          best_prev = p;
6607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
661af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      }
662af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      // Store best finding in current node.
663af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      cur->sign = sign;
664af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      cur->level = level;
665af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      cur->prev = best_prev;
666af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      ss_cur[m].score = best_cur_score;
667af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
668af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      // Now, record best terminal node (and thus best entry in the graph).
669af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      if (level != 0) {
670af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        const score_t score = best_cur_score + last_pos_score;
671af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        if (score < best_score) {
672af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora          best_score = score;
673af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora          best_path[0] = n;                     // best eob position
674af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora          best_path[1] = m;                     // best node index
675af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora          best_path[2] = best_prev;             // best predecessor
6767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
6777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
6787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
6797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Fresh start
6827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  memset(in + first, 0, (16 - first) * sizeof(*in));
6837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  memset(out + first, 0, (16 - first) * sizeof(*out));
6847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (best_path[0] == -1) {
6857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    return 0;   // skip!
6867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
688af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  {
689af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    // Unwind the best path.
690af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    // Note: best-prev on terminal node is not necessarily equal to the
691af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    // best_prev for non-terminal. So we patch best_path[2] in.
692af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    int nz = 0;
693af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    int best_node = best_path[1];
694af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    n = best_path[0];
695af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
696af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
697af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    for (; n >= first; --n) {
698af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      const Node* const node = &NODE(n, best_node);
699af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      const int j = kZigzag[n];
700af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      out[n] = node->sign ? -node->level : node->level;
701af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      nz |= node->level;
702af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      in[j] = out[n] * mtx->q_[j];
703af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      best_node = node->prev;
704af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    }
705af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    return (nz != 0);
7067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
7077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
7087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#undef NODE
7107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
711a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
7127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Performs: difference, transform, quantize, back-transform, add
7137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// all at once. Output is the reconstructed block in *yuv_out, and the
7147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// quantized levels in *levels.
7157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ReconstructIntra16(VP8EncIterator* const it,
7177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              VP8ModeScore* const rd,
7187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              uint8_t* const yuv_out,
7197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              int mode) {
720af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const VP8Encoder* const enc = it->enc_;
7217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
7227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + Y_OFF;
723af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
7247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
7257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int n;
7267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int16_t tmp[16][16], dc_tmp[16];
7277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (n = 0; n < 16; ++n) {
7297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
7307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
7317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8FTransformWHT(tmp[0], dc_tmp);
7328b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
7337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (DO_TRELLIS_I16 && it->do_trellis_) {
7357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int x, y;
7367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8IteratorNzToBytes(it);
7377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (y = 0, n = 0; y < 4; ++y) {
7387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      for (x = 0; x < 4; ++x, ++n) {
7397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const int ctx = it->top_nz_[x] + it->left_nz_[y];
7407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const int non_zero =
741af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora            TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
742af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora                                 &dqm->y1_, dqm->lambda_trellis_i16_);
7437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        it->top_nz_[x] = it->left_nz_[y] = non_zero;
744af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora        rd->y_ac_levels[n][0] = 0;
7457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        nz |= non_zero << n;
7467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
7477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
7487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
7497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (n = 0; n < 16; ++n) {
750af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      // Zero-out the first coeff, so that: a) nz is correct below, and
751af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
752af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      tmp[n][0] = 0;
753af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
754af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      assert(rd->y_ac_levels[n][0] == 0);
7557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
7567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
7577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Transform back
759af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  VP8TransformWHT(dc_tmp, tmp[0]);
760466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  for (n = 0; n < 16; n += 2) {
761466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora    VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
7627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
7637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return nz;
7657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
7667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ReconstructIntra4(VP8EncIterator* const it,
7687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             int16_t levels[16],
7697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             const uint8_t* const src,
7707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             uint8_t* const yuv_out,
7717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             int mode) {
7727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
7737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
7747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
7757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
7767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int16_t tmp[16];
7777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8FTransform(src, ref, tmp);
7797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (DO_TRELLIS_I4 && it->do_trellis_) {
7807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int x = it->i4_ & 3, y = it->i4_ >> 2;
7817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int ctx = it->top_nz_[x] + it->left_nz_[y];
782af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
7837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              dqm->lambda_trellis_i4_);
7847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
785af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
7867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
787466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  VP8ITransform(ref, tmp, yuv_out, 0);
7887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return nz;
7897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
7907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
7927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                         uint8_t* const yuv_out, int mode) {
7937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
7947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
7957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + U_OFF;
7967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
7977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
7987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int n;
7997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int16_t tmp[8][16];
8007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (n = 0; n < 8; ++n) {
802af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
8037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
8047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (DO_TRELLIS_UV && it->do_trellis_) {
8057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int ch, x, y;
8067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (ch = 0, n = 0; ch <= 2; ch += 2) {
8077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      for (y = 0; y < 2; ++y) {
8087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        for (x = 0; x < 2; ++x, ++n) {
8097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
8107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          const int non_zero =
811af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora              TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
812af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora                                   &dqm->uv_, dqm->lambda_trellis_uv_);
8137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
8147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          nz |= non_zero << n;
8157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
8167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
8177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
8187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
8197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (n = 0; n < 8; ++n) {
820af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
8217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
8227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
8237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
824466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  for (n = 0; n < 8; n += 2) {
825af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
8267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
8277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return (nz << 16);
8287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
8297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
830a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
8317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
8328b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
8338b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
8348b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorastatic void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
8358b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // We look at the first three AC coefficients to determine what is the average
8368b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // delta between each sub-4x4 block.
8378b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int v0 = abs(DCs[1]);
8388b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int v1 = abs(DCs[4]);
8398b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int v2 = abs(DCs[5]);
8408b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  int max_v = (v0 > v1) ? v1 : v0;
8418b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  max_v = (v2 > max_v) ? v2 : max_v;
8428b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
8438b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora}
8447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SwapPtr(uint8_t** a, uint8_t** b) {
8467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const tmp = *a;
8477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  *a = *b;
8487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  *b = tmp;
8497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
8507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SwapOut(VP8EncIterator* const it) {
8527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
8537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
8547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8558b720228d581a84fd173b6dcb2fa295b59db489aVikas Arorastatic score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
8568b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  score_t score = 0;
8578b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  while (num_blocks-- > 0) {      // TODO(skal): refine positional scoring?
8588b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    int i;
8598b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    for (i = 1; i < 16; ++i) {    // omit DC, we're only interested in AC
8608b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      score += (levels[i] != 0);
8618b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      if (score > thresh) return 0;
8628b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
8638b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    levels += 16;
8648b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
8658b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  return 1;
8668b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora}
8678b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
8687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
8698b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int kNumBlocks = 16;
870af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
8717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int lambda = dqm->lambda_i16_;
8727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int tlambda = dqm->tlambda_;
8737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + Y_OFF;
8747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ModeScore rd16;
8757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int mode;
8767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->mode_i16 = -1;
8781e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
8797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF;  // scratch buffer
8807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int nz;
8817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Reconstruct
8837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
8847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Measure RD-score
8867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.D = VP8SSE16x16(src, tmp_dst);
8877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
8887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora            : 0;
8898b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    rd16.H = VP8FixedCostsI16[mode];
8907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.R = VP8GetCostLuma16(it, &rd16);
8918b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (mode > 0 &&
8928b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora        IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
8938b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      // penalty to avoid flat area to be mispredicted by complex mode
8948b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      rd16.R += FLATNESS_PENALTY * kNumBlocks;
8958b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
8967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Since we always examine Intra16 first, we can overwrite *rd directly.
8987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SetRDScore(lambda, &rd16);
8997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (mode == 0 || rd16.score < rd->score) {
9007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      CopyScore(rd, &rd16);
9017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd->mode_i16 = mode;
9027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd->nz = nz;
9037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
9047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
9057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      SwapOut(it);
9067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
9077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
9087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
9097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetIntra16Mode(it, rd->mode_i16);
9108b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora
9118b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // we have a blocky macroblock (only DCs are non-zero) with fairly high
9128b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // distortion, record max delta so we can later adjust the minimal filtering
9138b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  // strength needed to smooth these blocks out.
9148b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
9158b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    StoreMaxDelta(dqm, rd->y_dc_levels);
9168b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  }
9177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
9187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
919a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
9207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// return the cost array corresponding to the surrounding prediction modes.
9227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t* GetCostModeI4(VP8EncIterator* const it,
923a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora                                     const uint8_t modes[16]) {
9247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int preds_w = it->enc_->preds_w_;
9257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
9267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
9277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
9287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return VP8FixedCostsI4[top][left];
9297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
9307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
932a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const VP8Encoder* const enc = it->enc_;
9337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
9347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int lambda = dqm->lambda_i4_;
9357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int tlambda = dqm->tlambda_;
9367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
9377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
938a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  int total_header_bits = 0;
9397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ModeScore rd_best;
9407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
941a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  if (enc->max_i4_header_bits_ == 0) {
942a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    return 0;
943a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  }
944a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
9457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  InitScore(&rd_best);
9468b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  rd_best.H = 211;  // '211' is the value of VP8BitCost(0, 145)
9478b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  SetRDScore(dqm->lambda_mode_, &rd_best);
9487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8IteratorStartI4(it);
9497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  do {
9508b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    const int kNumBlocks = 1;
9517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8ModeScore rd_i4;
9527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int mode;
9537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int best_mode = -1;
9547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const uint8_t* const src = src0 + VP8Scan[it->i4_];
9557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
9567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
9577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.
9587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    InitScore(&rd_i4);
9607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8MakeIntra4Preds(it);
9617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (mode = 0; mode < NUM_BMODES; ++mode) {
9627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      VP8ModeScore rd_tmp;
9637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      int16_t tmp_levels[16];
9647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Reconstruct
9667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.nz =
9677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
9687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Compute RD-score
9707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
9717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.SD =
9727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
9737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                  : 0;
9748b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      rd_tmp.H = mode_costs[mode];
9757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
9768b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
9778b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora        rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
9788b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      }
9797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      SetRDScore(lambda, &rd_tmp);
9817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
9827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        CopyScore(&rd_i4, &rd_tmp);
9837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        best_mode = mode;
9847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        SwapPtr(&tmp_dst, &best_block);
9857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
9867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
9877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
9887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SetRDScore(dqm->lambda_mode_, &rd_i4);
9897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    AddScore(&rd_best, &rd_i4);
9908b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (rd_best.score >= rd->score) {
9918b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      return 0;
9928b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
9938b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    total_header_bits += (int)rd_i4.H;   // <- equal to mode_costs[best_mode];
9948b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (total_header_bits > enc->max_i4_header_bits_) {
9957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      return 0;
9967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
9977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Copy selected samples if not in the right place already.
9988b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (best_block != best_blocks + VP8Scan[it->i4_]) {
9997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
10008b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
10017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd->modes_i4[it->i4_] = best_mode;
10027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
10037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } while (VP8IteratorRotateI4(it, best_blocks));
10047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // finalize state
10067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  CopyScore(rd, &rd_best);
10077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetIntra4Mode(it, rd->modes_i4);
10087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SwapOut(it);
10097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
10107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return 1;   // select intra4x4 over intra16x16
10117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
10127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1013a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
10147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
10168b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora  const int kNumBlocks = 8;
1017af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
10187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int lambda = dqm->lambda_uv_;
10197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + U_OFF;
10207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF;  // scratch buffer
10217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const dst0 = it->yuv_out_ + U_OFF;
10227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ModeScore rd_best;
10237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int mode;
10247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->mode_uv = -1;
10267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  InitScore(&rd_best);
10271e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
10287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8ModeScore rd_uv;
10297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Reconstruct
10317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
10327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Compute RD-score
10347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.D  = VP8SSE16x8(src, tmp_dst);
10357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.SD = 0;    // TODO: should we call TDisto? it tends to flatten areas.
10368b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    rd_uv.H  = VP8FixedCostsUV[mode];
10377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.R  = VP8GetCostUV(it, &rd_uv);
10388b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
10398b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora      rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
10408b720228d581a84fd173b6dcb2fa295b59db489aVikas Arora    }
10417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SetRDScore(lambda, &rd_uv);
10437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (mode == 0 || rd_uv.score < rd_best.score) {
10447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      CopyScore(&rd_best, &rd_uv);
10457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd->mode_uv = mode;
10467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
10477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(dst0, tmp_dst, UV_SIZE);   //  TODO: SwapUVOut() ?
10487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
10497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
10507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetIntraUVMode(it, rd->mode_uv);
10517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  AddScore(rd, &rd_best);
10527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
10537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1054a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
10557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Final reconstruction and quantization.
10567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
10587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
10591e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const int is_i16 = (it->mb_->type_ == 1);
10607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
10617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10621e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (is_i16) {
10637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
10647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
10657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8IteratorStartI4(it);
10667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    do {
10677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      const int mode =
10687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
10697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
10707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
10717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      VP8MakeIntra4Preds(it);
10727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
10737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              src, dst, mode) << it->i4_;
10747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
10757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
10767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
10787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->nz = nz;
10797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
10807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
10811e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora// Refine intra16/intra4 sub-modes based on distortion only (not rate).
10821e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
10831e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const int is_i16 = (it->mb_->type_ == 1);
10841e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  score_t best_score = MAX_COST;
10851e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
10861e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (try_both_i4_i16 || is_i16) {
10871e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    int mode;
10881e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    int best_mode = -1;
10891e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
10901e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
10911e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      const uint8_t* const src = it->yuv_in_ + Y_OFF;
10921e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      const score_t score = VP8SSE16x16(src, ref);
10931e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      if (score < best_score) {
10941e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        best_mode = mode;
10951e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        best_score = score;
10961e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      }
10971e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    }
10981e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8SetIntra16Mode(it, best_mode);
10991e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
11001e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (try_both_i4_i16 || !is_i16) {
11011e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    uint8_t modes_i4[16];
11021e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // We don't evaluate the rate here, but just account for it through a
11031e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // constant penalty (i4 mode usually needs more bits compared to i16).
11041e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    score_t score_i4 = (score_t)I4_PENALTY;
11051e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
11061e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    VP8IteratorStartI4(it);
11071e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    do {
11081e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      int mode;
11091e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      int best_sub_mode = -1;
11101e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      score_t best_sub_score = MAX_COST;
11111e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
11121e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
11131e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      // TODO(skal): we don't really need the prediction pixels here,
11141e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      // but just the distortion against 'src'.
11151e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      VP8MakeIntra4Preds(it);
11161e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      for (mode = 0; mode < NUM_BMODES; ++mode) {
11171e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
11181e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        const score_t score = VP8SSE4x4(src, ref);
11191e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        if (score < best_sub_score) {
11201e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora          best_sub_mode = mode;
11211e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora          best_sub_score = score;
11221e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora        }
11231e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      }
11241e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      modes_i4[it->i4_] = best_sub_mode;
11251e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      score_i4 += best_sub_score;
11261e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      if (score_i4 >= best_score) break;
11271e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
11281e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    if (score_i4 < best_score) {
11291e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora      VP8SetIntra4Mode(it, modes_i4);
11301e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    }
11311e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
11321e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora}
11331e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora
1134a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
11357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Entry point
11367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
11371e7bf8805bd030c19924a5306837ecd72c295751Vikas Aroraint VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
11381e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora                VP8RDLevel rd_opt) {
11397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int is_skipped;
11401e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const int method = it->enc_->method_;
11417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
11427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  InitScore(rd);
11437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
11447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We can perform predictions for Luma16x16 and Chroma8x8 already.
11457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Luma4x4 predictions needs to be done as-we-go.
11467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8MakeLuma16Preds(it);
11477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8MakeChroma8Preds(it);
11487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
11491e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (rd_opt > RD_OPT_NONE) {
11501e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
11517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    PickBestIntra16(it, rd);
11521e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    if (method >= 2) {
11537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      PickBestIntra4(it, rd);
11547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
11557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    PickBestUV(it, rd);
11561e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    if (rd_opt == RD_OPT_TRELLIS) {   // finish off with trellis-optim now
11577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      it->do_trellis_ = 1;
11587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      SimpleQuantize(it, rd);
11597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
11607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
11611e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
11621e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
11631e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    DistoRefine(it, (method >= 2));
11647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SimpleQuantize(it, rd);
11657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
11667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  is_skipped = (rd->nz == 0);
11677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetSkip(it, is_skipped);
11687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return is_skipped;
11697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
11707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1171