quant.c revision a2415724fb3466168b2af5b08bd94ba732c0e753
1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2011 Google Inc. All Rights Reserved.
27c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//
37c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// This code is licensed under the same terms as WebM:
47c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//  Software License Agreement:  http://www.webmproject.org/license/software/
57c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
67c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// -----------------------------------------------------------------------------
77c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//
87c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//   Quantization
97c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora//
107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Author: Skal (pascal.massimino@gmail.com)
117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#include <assert.h>
137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#include <math.h>
147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
15a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./vp8enci.h"
16a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./cost.h"
177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define DO_TRELLIS_I4  1
197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define DO_TRELLIS_UV  0   // disable trellis for UV. Risky. Not worth.
217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define USE_TDISTO 1
227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MID_ALPHA 64      // neutral value for susceptibility
247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MIN_ALPHA 30      // lowest usable value for susceptibility
257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MAX_ALPHA 100     // higher meaninful value for susceptibility
267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                          // power-law modulation. Must be strictly less than 1.
297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#if defined(__cplusplus) || defined(c_plusplus)
337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraextern "C" {
347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#endif
357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
36a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
38a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE int clip(int v, int m, int M) {
397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return v < m ? m : v > M ? M : v;
407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
42a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic const uint8_t kZigzag[16] = {
437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint8_t kDcTable[128] = {
477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  4,     5,   6,   7,   8,   9,  10,  10,
487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  11,   12,  13,  14,  15,  16,  17,  17,
497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  18,   19,  20,  20,  21,  21,  22,  22,
507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  23,   23,  24,  25,  25,  26,  27,  28,
517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  29,   30,  31,  32,  33,  34,  35,  36,
527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  37,   37,  38,  39,  40,  41,  42,  43,
537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  44,   45,  46,  46,  47,  48,  49,  50,
547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  51,   52,  53,  54,  55,  56,  57,  58,
557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  59,   60,  61,  62,  63,  64,  65,  66,
567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  67,   68,  69,  70,  71,  72,  73,  74,
577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  75,   76,  76,  77,  78,  79,  80,  81,
587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  82,   83,  84,  85,  86,  87,  88,  89,
597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  91,   93,  95,  96,  98, 100, 101, 102,
607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  104, 106, 108, 110, 112, 114, 116, 118,
617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  122, 124, 126, 128, 130, 132, 134, 136,
627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  138, 140, 143, 145, 148, 151, 154, 157
637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kAcTable[128] = {
667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  4,     5,   6,   7,   8,   9,  10,  11,
677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  12,   13,  14,  15,  16,  17,  18,  19,
687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  20,   21,  22,  23,  24,  25,  26,  27,
697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  28,   29,  30,  31,  32,  33,  34,  35,
707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  36,   37,  38,  39,  40,  41,  42,  43,
717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  44,   45,  46,  47,  48,  49,  50,  51,
727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  52,   53,  54,  55,  56,  57,  58,  60,
737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  62,   64,  66,  68,  70,  72,  74,  76,
747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  78,   80,  82,  84,  86,  88,  90,  92,
757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  94,   96,  98, 100, 102, 104, 106, 108,
767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  110, 112, 114, 116, 119, 122, 125, 128,
777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  131, 134, 137, 140, 143, 146, 149, 152,
787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  155, 158, 161, 164, 167, 170, 173, 177,
797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  181, 185, 189, 193, 197, 201, 205, 209,
807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  213, 217, 221, 225, 229, 234, 239, 245,
817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  249, 254, 259, 264, 269, 274, 279, 284
827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kAcTable2[128] = {
857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  8,     8,   9,  10,  12,  13,  15,  17,
867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  18,   20,  21,  23,  24,  26,  27,  29,
877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  31,   32,  34,  35,  37,  38,  40,  41,
887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  43,   44,  46,  48,  49,  51,  52,  54,
897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  55,   57,  58,  60,  62,  63,  65,  66,
907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  68,   69,  71,  72,  74,  75,  77,  79,
917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  80,   82,  83,  85,  86,  88,  89,  93,
927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  96,   99, 102, 105, 108, 111, 114, 117,
937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  120, 124, 127, 130, 133, 136, 139, 142,
947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  145, 148, 151, 155, 158, 161, 164, 167,
957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  170, 173, 176, 179, 184, 189, 193, 198,
967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  203, 207, 212, 217, 221, 226, 230, 235,
977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  240, 244, 249, 254, 258, 263, 268, 274,
987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  280, 286, 292, 299, 305, 311, 317, 323,
997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  330, 336, 342, 348, 354, 362, 370, 379,
1007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  385, 393, 401, 409, 416, 424, 432, 440
1017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kCoeffThresh[16] = {
1047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0,  10, 20, 30,
1057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  10, 20, 30, 30,
1067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  20, 30, 30, 30,
1077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  30, 30, 30, 30
1087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// TODO(skal): tune more. Coeff thresholding?
1117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint8_t kBiasMatrices[3][16] = {  // [3] = [luma-ac,luma-dc,chroma]
1127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  { 96, 96, 96, 96,
1137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96,
1147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96,
1157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96 },
1167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  { 96, 96, 96, 96,
1177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96,
1187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96,
1197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96 },
1207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  { 96, 96, 96, 96,
1217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96,
1227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96,
1237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    96, 96, 96, 96 }
1247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).
1277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Hack-ish but helpful for mid-bitrate range. Use with care.
1287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint8_t kFreqSharpening[16] = {
1297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0,  30, 60, 90,
1307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  30, 60, 90, 90,
1317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  60, 90, 90, 90,
1327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  90, 90, 90, 90
1337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
1347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
135a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
1367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Initialize quantization parameters in VP8Matrix
1377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Returns the average quantizer
1397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ExpandMatrix(VP8Matrix* const m, int type) {
1407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
1417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int sum = 0;
1427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 2; i < 16; ++i) {
1437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->q_[i] = m->q_[1];
1447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
1457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < 16; ++i) {
146a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    const int j = kZigzag[i];
1477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int bias = kBiasMatrices[type][j];
1487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->iq_[j] = (1 << QFIX) / m->q_[j];
1497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->bias_[j] = BIAS(bias);
1507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // TODO(skal): tune kCoeffThresh[]
1517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8;
1527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;
1537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    sum += m->q_[j];
1547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
1557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return (sum + 8) >> 4;
1567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
1577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SetupMatrices(VP8Encoder* enc) {
1597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
1607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int tlambda_scale =
1617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    (enc->method_ >= 4) ? enc->config_->sns_strength
1627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                        : 0;
1637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int num_segments = enc->segment_hdr_.num_segments_;
1647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < num_segments; ++i) {
1657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8SegmentInfo* const m = &enc->dqm_[i];
1667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int q = m->quant_;
1677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int q4, q16, quv;
1687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
1697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
1707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
1727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
1737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
1757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
1767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    q4  = ExpandMatrix(&m->y1_, 0);
1787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    q16 = ExpandMatrix(&m->y2_, 1);
1797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    quv = ExpandMatrix(&m->uv_, 2);
1807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // TODO: Switch to kLambda*[] tables?
1827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    {
1837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_i4_  = (3 * q4 * q4) >> 7;
1847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_i16_ = (3 * q16 * q16);
1857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_uv_  = (3 * quv * quv) >> 6;
1867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_mode_    = (1 * q4 * q4) >> 7;
1877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
1887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_trellis_i16_ = (q16 * q16) >> 2;
1897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->lambda_trellis_uv_  = (quv *quv) << 1;
1907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      m->tlambda_            = (tlambda_scale * q4) >> 5;
1917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
1927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
1937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
1947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
195a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
1967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Initialize filtering parameters
1977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
1987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Very small filter-strength values have close to no visual effect. So we can
1997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// save a little decoding-CPU by turning filtering off for these.
2007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define FSTRENGTH_CUTOFF 3
2017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SetupFilterStrength(VP8Encoder* const enc) {
2037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
2047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int level0 = enc->config_->filter_strength;
2057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
2067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)
2077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;
2087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int f = level / (256 + enc->dqm_[i].beta_);
2097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
2107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We record the initial strength (mainly for the case of 1-segment only).
2127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
2137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
2147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
2157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
2167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
217a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
2187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Note: if you change the values below, remember that the max range
2207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// allowed by the syntax for DQ_UV is [-16,16].
2217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MAX_DQ_UV (6)
2227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MIN_DQ_UV (-4)
2237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// We want to emulate jpeg-like behaviour where the expected "good" quality
2257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// is around q=75. Internally, our "good" middle is around c=50. So we
2267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// map accordingly using linear piece-wise function
2277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic double QualityToCompression(double q) {
2287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const double c = q / 100.;
2297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
2307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
2317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
2337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int i;
2347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int dq_uv_ac, dq_uv_dc;
2357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int num_segments = enc->config_->segments;
2367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
2377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const double c_base = QualityToCompression(quality);
2387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = 0; i < num_segments; ++i) {
2397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // The file size roughly scales as pow(quantizer, 3.). Actually, the
2407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
2417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // in the mid-quant range. So we scale the compressibility inversely to
2427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // this power-law: quant ~= compression ^ 1/3. This law holds well for
2437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // low quant. Finer modelling for high-quant would make use of kAcTable[]
2447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // more explicitely.
2457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Additionally, we modulate the base exponent 1/3 to accommodate for the
2467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // quantization susceptibility and allow denser segments to be quantized
2477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // more.
2487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.;
2497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const double c = pow(c_base, expn);
2507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int q = (int)(127. * (1. - c));
2517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    assert(expn > 0.);
2527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    enc->dqm_[i].quant_ = clip(q, 0, 127);
2537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // purely indicative in the bitstream (except for the 1-segment case)
2567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->base_quant_ = enc->dqm_[0].quant_;
2577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // fill-in values for the unused segments (required by the syntax)
2597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
2607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    enc->dqm_[i].quant_ = enc->base_quant_;
2617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
2627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // uv_alpha_ is normally spread around ~60. The useful range is
2647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
2657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
2667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
2677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                          / (MAX_ALPHA - MIN_ALPHA);
2687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // we rescale by the user-defined strength of adaptation
2697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
2707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // and make it safe.
2717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
2727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We also boost the dc-uv-quant a little, based on sns-strength, since
2737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // U/V channels are quite more reactive to high quants (flat DC-blocks
2747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // tend to appear, and are displeasant).
2757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_dc = -4 * enc->config_->sns_strength / 100;
2767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed
2777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_y1_dc_ = 0;       // TODO(skal): dq-lum
2797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_y2_dc_ = 0;
2807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_y2_ac_ = 0;
2817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_uv_dc_ = dq_uv_dc;
2827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  enc->dq_uv_ac_ = dq_uv_ac;
2837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SetupMatrices(enc);
2857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SetupFilterStrength(enc);   // initialize segments' filtering, eventually
2877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
2887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
289a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
2907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Form the predictions in cache
2917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
2937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
2947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
2957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
2967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
2977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8I4ModeOffsets[NUM_BMODES] = {
2987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
2997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
3007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8MakeLuma16Preds(const VP8EncIterator* const it) {
302a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const VP8Encoder* const enc = it->enc_;
303a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;
304a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
3057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8EncPredLuma16(it->yuv_p_, left, top);
3067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8MakeChroma8Preds(const VP8EncIterator* const it) {
309a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const VP8Encoder* const enc = it->enc_;
310a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;
311a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
3127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8EncPredChroma8(it->yuv_p_, left, top);
3137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroravoid VP8MakeIntra4Preds(const VP8EncIterator* const it) {
3167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
3177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
319a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
3207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Quantize
3217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Layout:
3237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// +----+
3247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 0
3257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 4
3267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 8
3277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |YYYY| 12
3287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// +----+
3297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |UUVV| 16
3307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// |UUVV| 20
3317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// +----+
3327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraconst int VP8Scan[16 + 4 + 4] = {
3347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Luma
3357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
3367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
3377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
3387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
3397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
3417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
3427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
3437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
344a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
3457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Distortion measurement
3467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kWeightY[16] = {
3487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
3497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
3507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t kWeightTrellis[16] = {
3527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#if USE_TDISTO == 0
3537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
3547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#else
3557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  30, 27, 19, 11,
3567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  27, 24, 17, 10,
3577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  19, 17, 12,  8,
3587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  11, 10,  8,  6
3597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#endif
3607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora};
3617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Init/Copy the common fields in score.
3637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void InitScore(VP8ModeScore* const rd) {
3647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->D  = 0;
3657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->SD = 0;
3667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->R  = 0;
3677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->nz = 0;
3687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->score = MAX_COST;
3697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
3727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->D  = src->D;
3737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->SD = src->SD;
3747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->R  = src->R;
3757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->nz = src->nz;      // note that nz is not accumulated, but just copied.
3767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->score = src->score;
3777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
3807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->D  += src->D;
3817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->SD += src->SD;
3827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->R  += src->R;
3837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->nz |= src->nz;     // here, new nz bits are accumulated.
3847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  dst->score += src->score;
3857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
3867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
387a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
3887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Performs trellis-optimized quantization.
3897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Trellis
3917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
3927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroratypedef struct {
3937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int prev;        // best previous
3947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int level;       // level
3957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int sign;        // sign of coeff_i
3967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  score_t cost;    // bit cost
3977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  score_t error;   // distortion = sum of (|coeff_i| - level_i * Q_i)^2
3987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int ctx;         // context (only depends on 'level'. Could be spared.)
3997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora} Node;
4007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// If a coefficient was quantized to a value Q (using a neutral bias),
4027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
4037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// We don't test negative values though.
4047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MIN_DELTA 0   // how much lower level to try
4057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define MAX_DELTA 1   // how much higher
4067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
4077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
4087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
409a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
4107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // TODO: incorporate the "* 256" in the tables?
4117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);
4127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
414a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
415a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora                                          score_t distortion) {
4167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return rate * lambda + 256 * distortion;
4177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
4187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int TrellisQuantizeBlock(const VP8EncIterator* const it,
4207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                int16_t in[16], int16_t out[16],
4217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                int ctx0, int coeff_type,
4227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                const VP8Matrix* const mtx,
4237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                int lambda) {
4247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type];
4257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
4267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int first = (coeff_type == 0) ? 1 : 0;
4277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  Node nodes[17][NUM_NODES];
4287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
4297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  score_t best_score;
4307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int best_node;
4317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int last = first - 1;
4327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int n, m, p, nz;
4337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  {
4357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    score_t cost;
4367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    score_t max_error;
4377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
4387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int last_proba = last_costs[VP8EncBands[first]][ctx0][0];
4397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // compute maximal distortion.
4417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    max_error = 0;
4427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (n = first; n < 16; ++n) {
443a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora      const int j  = kZigzag[n];
4447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      const int err = in[j] * in[j];
4457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      max_error += kWeightTrellis[j] * err;
4467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      if (err > thresh) last = n;
4477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
4487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // we don't need to go inspect up to n = 16 coeffs. We can just go up
4497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // to last + 1 (inclusive) without losing much.
4507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (last < 15) ++last;
4517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // compute 'skip' score. This is the max score one can do.
4537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    cost = VP8BitCost(0, last_proba);
4547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    best_score = RDScoreTrellis(lambda, cost, max_error);
4557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // initialize source node.
4577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    n = first - 1;
4587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
4597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      NODE(n, m).cost = 0;
4607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      NODE(n, m).error = max_error;
4617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      NODE(n, m).ctx = ctx0;
4627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
4637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
4647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // traverse trellis.
4667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (n = first; n <= last; ++n) {
467a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    const int j  = kZigzag[n];
4687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int Q  = mtx->q_[j];
4697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int iQ = mtx->iq_[j];
4707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int B = BIAS(0x00);     // neutral bias
4717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // note: it's important to take sign of the _original_ coeff,
4727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // so we don't have to consider level < 0 afterward.
4737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int sign = (in[j] < 0);
4747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
4757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int level0;
4767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (coeff0 > 2047) coeff0 = 2047;
4777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    level0 = QUANTDIV(coeff0, iQ, B);
4797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // test all alternate level values around level0.
4807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
4817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      Node* const cur = &NODE(n, m);
4827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      int delta_error, new_error;
4837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      score_t cur_score = MAX_COST;
4847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      int level = level0 + m;
4857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      int last_proba;
4867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      cur->sign = sign;
4887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      cur->level = level;
4897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
4907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      if (level >= 2048 || level < 0) {   // node is dead?
4917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        cur->cost = MAX_COST;
4927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        continue;
4937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
4947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
4957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
4967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Compute delta_error = how much coding this level will
4977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // subtract as distortion to max_error
4987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      new_error = coeff0 - level * Q;
4997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      delta_error =
5007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
5017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Inspect all possible non-dead predecessors. Retain only the best one.
5037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
5047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const Node* const prev = &NODE(n - 1, p);
5057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const int prev_ctx = prev->ctx;
5067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
5077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const score_t total_error = prev->error - delta_error;
5087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        score_t cost, base_cost, score;
5097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        if (prev->cost >= MAX_COST) {   // dead node?
5117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          continue;
5127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
5137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        // Base cost of both terminal/non-terminal
5157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        base_cost = prev->cost + VP8LevelCost(tcost, level);
5167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        // Examine node assuming it's a non-terminal one.
5187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        cost = base_cost;
5197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        if (level && n < 15) {
5207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          cost += VP8BitCost(1, last_proba);
5217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
5227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        score = RDScoreTrellis(lambda, cost, total_error);
5237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        if (score < cur_score) {
5247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          cur_score = score;
5257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          cur->cost  = cost;
5267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          cur->error = total_error;
5277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          cur->prev  = p;
5287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
5297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        // Now, record best terminal node (and thus best entry in the graph).
5317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        if (level) {
5327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          cost = base_cost;
5337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          if (n < 15) cost += VP8BitCost(0, last_proba);
5347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          score = RDScoreTrellis(lambda, cost, total_error);
5357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          if (score < best_score) {
5367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora            best_score = score;
5377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora            best_path[0] = n;   // best eob position
5387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora            best_path[1] = m;   // best level
5397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora            best_path[2] = p;   // best predecessor
5407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          }
5417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
5427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
5437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
5447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
5457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Fresh start
5477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  memset(in + first, 0, (16 - first) * sizeof(*in));
5487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  memset(out + first, 0, (16 - first) * sizeof(*out));
5497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (best_path[0] == -1) {
5507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    return 0;   // skip!
5517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
5527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Unwind the best path.
5547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Note: best-prev on terminal node is not necessarily equal to the
5557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // best_prev for non-terminal. So we patch best_path[2] in.
5567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  n = best_path[0];
5577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  best_node = best_path[1];
5587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
5597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  nz = 0;
5607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (; n >= first; --n) {
5627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const Node* const node = &NODE(n, best_node);
563a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    const int j = kZigzag[n];
5647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    out[n] = node->sign ? -node->level : node->level;
5657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz |= (node->level != 0);
5667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    in[j] = out[n] * mtx->q_[j];
5677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    best_node = node->prev;
5687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
5697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return nz;
5707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
5717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#undef NODE
5737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
574a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
5757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Performs: difference, transform, quantize, back-transform, add
5767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// all at once. Output is the reconstructed block in *yuv_out, and the
5777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// quantized levels in *levels.
5787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ReconstructIntra16(VP8EncIterator* const it,
5807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              VP8ModeScore* const rd,
5817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              uint8_t* const yuv_out,
5827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              int mode) {
5837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
5847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
5857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + Y_OFF;
5867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
5877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
5887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int n;
5897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int16_t tmp[16][16], dc_tmp[16];
5907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (n = 0; n < 16; ++n) {
5927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
5937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
5947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8FTransformWHT(tmp[0], dc_tmp);
5957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;
5967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
5977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (DO_TRELLIS_I16 && it->do_trellis_) {
5987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int x, y;
5997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8IteratorNzToBytes(it);
6007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (y = 0, n = 0; y < 4; ++y) {
6017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      for (x = 0; x < 4; ++x, ++n) {
6027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const int ctx = it->top_nz_[x] + it->left_nz_[y];
6037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        const int non_zero =
6047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora           TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,
6057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                                &dqm->y1_, dqm->lambda_trellis_i16_);
6067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        it->top_nz_[x] = it->left_nz_[y] = non_zero;
6077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        nz |= non_zero << n;
6087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
6097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
6107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
6117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (n = 0; n < 16; ++n) {
6127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n;
6137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
6147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Transform back
6177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ITransformWHT(dc_tmp, tmp[0]);
618466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  for (n = 0; n < 16; n += 2) {
619466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora    VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
6207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return nz;
6237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
6247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ReconstructIntra4(VP8EncIterator* const it,
6267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             int16_t levels[16],
6277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             const uint8_t* const src,
6287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             uint8_t* const yuv_out,
6297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                             int mode) {
6307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
6317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
6327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
6337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
6347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int16_t tmp[16];
6357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8FTransform(src, ref, tmp);
6377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (DO_TRELLIS_I4 && it->do_trellis_) {
6387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int x = it->i4_ & 3, y = it->i4_ >> 2;
6397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const int ctx = it->top_nz_[x] + it->left_nz_[y];
6407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_,
6417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              dqm->lambda_trellis_i4_);
6427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
6437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
6447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
645466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  VP8ITransform(ref, tmp, yuv_out, 0);
6467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return nz;
6477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
6487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
6507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                         uint8_t* const yuv_out, int mode) {
6517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
6527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
6537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + U_OFF;
6547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
6557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
6567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int n;
6577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int16_t tmp[8][16];
6587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (n = 0; n < 8; ++n) {
6607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]);
6617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (DO_TRELLIS_UV && it->do_trellis_) {
6637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int ch, x, y;
6647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (ch = 0, n = 0; ch <= 2; ch += 2) {
6657c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      for (y = 0; y < 2; ++y) {
6667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        for (x = 0; x < 2; ++x, ++n) {
6677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
6687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          const int non_zero =
669466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora            TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2,
670466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora                                 &dqm->uv_, dqm->lambda_trellis_uv_);
6717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
6727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          nz |= non_zero << n;
6737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        }
6747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
6757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
6767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
6777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (n = 0; n < 8; ++n) {
6787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n;
6797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
6807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
682466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora  for (n = 0; n < 8; n += 2) {
683466727975bcc57c0c5597bcd0747a2fe4777b303Vikas Arora    VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
6847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
6857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return (nz << 16);
6867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
6877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
688a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
6897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
6907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Pick the mode is lower RD-cost = Rate + lamba * Distortion.
6917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SwapPtr(uint8_t** a, uint8_t** b) {
6937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const tmp = *a;
6947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  *a = *b;
6957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  *b = tmp;
6967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
6977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
6987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SwapOut(VP8EncIterator* const it) {
6997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
7007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
7017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
703a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const VP8Encoder* const enc = it->enc_;
7047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
7057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int lambda = dqm->lambda_i16_;
7067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int tlambda = dqm->tlambda_;
7077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + Y_OFF;
7087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ModeScore rd16;
7097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int mode;
7107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->mode_i16 = -1;
7127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (mode = 0; mode < 4; ++mode) {
7137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF;  // scratch buffer
7147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int nz;
7157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Reconstruct
7177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
7187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Measure RD-score
7207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.D = VP8SSE16x16(src, tmp_dst);
7217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
7227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora            : 0;
7237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.R = VP8GetCostLuma16(it, &rd16);
7247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd16.R += VP8FixedCostsI16[mode];
7257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Since we always examine Intra16 first, we can overwrite *rd directly.
7277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SetRDScore(lambda, &rd16);
7287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (mode == 0 || rd16.score < rd->score) {
7297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      CopyScore(rd, &rd16);
7307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd->mode_i16 = mode;
7317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd->nz = nz;
7327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
7337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
7347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      SwapOut(it);
7357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
7367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
7377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
7387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetIntra16Mode(it, rd->mode_i16);
7397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
7407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
741a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
7427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// return the cost array corresponding to the surrounding prediction modes.
7447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic const uint16_t* GetCostModeI4(VP8EncIterator* const it,
745a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora                                     const uint8_t modes[16]) {
7467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int preds_w = it->enc_->preds_w_;
7477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
7487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
7497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
7507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return VP8FixedCostsI4[top][left];
7517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
7527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
754a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const VP8Encoder* const enc = it->enc_;
7557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
7567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int lambda = dqm->lambda_i4_;
7577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int tlambda = dqm->tlambda_;
7587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
7597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
760a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  int total_header_bits = 0;
7617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ModeScore rd_best;
7627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
763a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  if (enc->max_i4_header_bits_ == 0) {
764a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    return 0;
765a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  }
766a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
7677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  InitScore(&rd_best);
768a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  rd_best.score = 211;  // '211' is the value of VP8BitCost(0, 145)
7697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8IteratorStartI4(it);
7707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  do {
7717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8ModeScore rd_i4;
7727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int mode;
7737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    int best_mode = -1;
7747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const uint8_t* const src = src0 + VP8Scan[it->i4_];
7757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
7767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
7777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.
7787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    InitScore(&rd_i4);
7807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8MakeIntra4Preds(it);
7817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    for (mode = 0; mode < NUM_BMODES; ++mode) {
7827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      VP8ModeScore rd_tmp;
7837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      int16_t tmp_levels[16];
7847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Reconstruct
7867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.nz =
7877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
7887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      // Compute RD-score
7907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
7917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.SD =
7927c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
7937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                  : 0;
7947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
7957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd_tmp.R += mode_costs[mode];
7967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
7977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      SetRDScore(lambda, &rd_tmp);
7987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
7997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        CopyScore(&rd_i4, &rd_tmp);
8007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        best_mode = mode;
8017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        SwapPtr(&tmp_dst, &best_block);
8027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
8037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      }
8047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
8057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SetRDScore(dqm->lambda_mode_, &rd_i4);
8067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    AddScore(&rd_best, &rd_i4);
807a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    total_header_bits += mode_costs[best_mode];
808a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    if (rd_best.score >= rd->score ||
809a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora        total_header_bits > enc->max_i4_header_bits_) {
8107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      return 0;
8117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
8127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Copy selected samples if not in the right place already.
8137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (best_block != best_blocks + VP8Scan[it->i4_])
8147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
8157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd->modes_i4[it->i4_] = best_mode;
8167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
8177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } while (VP8IteratorRotateI4(it, best_blocks));
8187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // finalize state
8207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  CopyScore(rd, &rd_best);
8217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetIntra4Mode(it, rd->modes_i4);
8227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  SwapOut(it);
8237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
8247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return 1;   // select intra4x4 over intra16x16
8257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
8267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
827a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
8287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
830a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  const VP8Encoder* const enc = it->enc_;
8317c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
8327c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int lambda = dqm->lambda_uv_;
8337c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const uint8_t* const src = it->yuv_in_ + U_OFF;
8347c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF;  // scratch buffer
8357c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  uint8_t* const dst0 = it->yuv_out_ + U_OFF;
8367c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8ModeScore rd_best;
8377c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int mode;
8387c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8397c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->mode_uv = -1;
8407c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  InitScore(&rd_best);
8417c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  for (mode = 0; mode < 4; ++mode) {
8427c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8ModeScore rd_uv;
8437c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8447c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Reconstruct
8457c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
8467c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8477c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // Compute RD-score
8487c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.D  = VP8SSE16x8(src, tmp_dst);
8497c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.SD = 0;    // TODO: should we call TDisto? it tends to flatten areas.
8507c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.R  = VP8GetCostUV(it, &rd_uv);
8517c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    rd_uv.R += VP8FixedCostsUV[mode];
8527c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8537c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SetRDScore(lambda, &rd_uv);
8547c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (mode == 0 || rd_uv.score < rd_best.score) {
8557c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      CopyScore(&rd_best, &rd_uv);
8567c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      rd->mode_uv = mode;
8577c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
8587c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      memcpy(dst0, tmp_dst, UV_SIZE);   //  TODO: SwapUVOut() ?
8597c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
8607c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
8617c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetIntraUVMode(it, rd->mode_uv);
8627c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  AddScore(rd, &rd_best);
8637c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
8647c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
865a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
8667c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Final reconstruction and quantization.
8677c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8687c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arorastatic void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
8697c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const VP8Encoder* const enc = it->enc_;
8707c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  const int i16 = (it->mb_->type_ == 1);
8717c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int nz = 0;
8727c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8737c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (i16) {
8747c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
8757c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
8767c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    VP8IteratorStartI4(it);
8777c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    do {
8787c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      const int mode =
8797c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
8807c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
8817c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
8827c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      VP8MakeIntra4Preds(it);
8837c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
8847c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora                              src, dst, mode) << it->i4_;
8857c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
8867c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
8877c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8887c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
8897c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  rd->nz = nz;
8907c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
8917c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
892a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
8937c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora// Entry point
8947c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8957c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Aroraint VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
8967c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  int is_skipped;
8977c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
8987c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  InitScore(rd);
8997c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9007c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // We can perform predictions for Luma16x16 and Chroma8x8 already.
9017c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // Luma4x4 predictions needs to be done as-we-go.
9027c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8MakeLuma16Preds(it);
9037c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8MakeChroma8Preds(it);
9047c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9057c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // for rd_opt = 2, we perform trellis-quant on the final decision only.
9067c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  // for rd_opt > 2, we use it for every scoring (=much slower).
9077c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  if (rd_opt > 0) {
9087c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    it->do_trellis_ = (rd_opt > 2);
9097c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    PickBestIntra16(it, rd);
9107c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (it->enc_->method_ >= 2) {
9117c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      PickBestIntra4(it, rd);
9127c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
9137c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    PickBestUV(it, rd);
9147c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    if (rd_opt == 2) {
9157c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      it->do_trellis_ = 1;
9167c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora      SimpleQuantize(it, rd);
9177c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    }
9187c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  } else {
9197c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    // TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
9207c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    it->do_trellis_ = (it->enc_->method_ == 2);
9217c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora    SimpleQuantize(it, rd);
9227c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  }
9237c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  is_skipped = (rd->nz == 0);
9247c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  VP8SetSkip(it, is_skipped);
9257c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora  return is_skipped;
9267c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}
9277c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora
9287c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#if defined(__cplusplus) || defined(c_plusplus)
9297c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora}    // extern "C"
9307c970a0a679089e416c5887cf7fcece15a70bfa4Vikas Arora#endif
931