1// Copyright 2011 Google Inc. 2// 3// This code is licensed under the same terms as WebM: 4// Software License Agreement: http://www.webmproject.org/license/software/ 5// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6// ----------------------------------------------------------------------------- 7// 8// WebP encoder: internal header. 9// 10// Author: Skal (pascal.massimino@gmail.com) 11 12#ifndef WEBP_ENC_VP8ENCI_H_ 13#define WEBP_ENC_VP8ENCI_H_ 14 15#include "string.h" // for memcpy() 16#include "webp/encode.h" 17#include "bit_writer.h" 18 19#if defined(__cplusplus) || defined(c_plusplus) 20extern "C" { 21#endif 22 23//----------------------------------------------------------------------------- 24// Various defines and enums 25 26// version numbers 27#define ENC_MAJ_VERSION 0 28#define ENC_MIN_VERSION 1 29#define ENC_REV_VERSION 2 30 31// size of histogram used by CollectHistogram. 32#define MAX_COEFF_THRESH 64 33 34// intra prediction modes 35enum { B_DC_PRED = 0, // 4x4 modes 36 B_TM_PRED = 1, 37 B_VE_PRED = 2, 38 B_HE_PRED = 3, 39 B_RD_PRED = 4, 40 B_VR_PRED = 5, 41 B_LD_PRED = 6, 42 B_VL_PRED = 7, 43 B_HD_PRED = 8, 44 B_HU_PRED = 9, 45 NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10 46 47 // Luma16 or UV modes 48 DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED, 49 H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED 50 }; 51 52enum { NUM_MB_SEGMENTS = 4, 53 MAX_NUM_PARTITIONS = 8, 54 NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC 55 NUM_BANDS = 8, 56 NUM_CTX = 3, 57 NUM_PROBAS = 11, 58 MAX_LF_LEVELS = 64, // Maximum loop filter level 59 MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost 60 }; 61 62// YUV-cache parameters. Cache is 16-pixels wide. 63// The original or reconstructed samples can be accessed using VP8Scan[] 64// The predicted blocks can be accessed using offsets to yuv_p_ and 65// the arrays VP8*ModeOffsets[]; 66// +----+ YUV Samples area. See VP8Scan[] for accessing the blocks. 67// Y_OFF |YYYY| <- original samples (enc->yuv_in_) 68// |YYYY| 69// |YYYY| 70// |YYYY| 71// U_OFF |UUVV| V_OFF (=U_OFF + 8) 72// |UUVV| 73// +----+ 74// Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_') 75// |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_') 76// |YYYY| 77// |YYYY| 78// U_OFF |UUVV| V_OFF 79// |UUVV| 80// x2 (for yuv_out2_) 81// +----+ Prediction area ('yuv_p_', size = PRED_SIZE) 82// I16DC16 |YYYY| Intra16 predictions (16x16 block each) 83// |YYYY| 84// |YYYY| 85// |YYYY| 86// I16TM16 |YYYY| 87// |YYYY| 88// |YYYY| 89// |YYYY| 90// I16VE16 |YYYY| 91// |YYYY| 92// |YYYY| 93// |YYYY| 94// I16HE16 |YYYY| 95// |YYYY| 96// |YYYY| 97// |YYYY| 98// +----+ Chroma U/V predictions (16x8 block each) 99// C8DC8 |UUVV| 100// |UUVV| 101// C8TM8 |UUVV| 102// |UUVV| 103// C8VE8 |UUVV| 104// |UUVV| 105// C8HE8 |UUVV| 106// |UUVV| 107// +----+ Intra 4x4 predictions (4x4 block each) 108// |YYYY| I4DC4 I4TM4 I4VE4 I4HE4 109// |YYYY| I4RD4 I4VR4 I4LD4 I4VL4 110// |YY..| I4HD4 I4HU4 I4TMP 111// +----+ 112#define BPS 16 // this is the common stride 113#define Y_SIZE (BPS * 16) 114#define UV_SIZE (BPS * 8) 115#define YUV_SIZE (Y_SIZE + UV_SIZE) 116#define PRED_SIZE (6 * 16 * BPS + 12 * BPS) 117#define Y_OFF (0) 118#define U_OFF (Y_SIZE) 119#define V_OFF (U_OFF + 8) 120#define ALIGN_CST 15 121#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) 122 123extern const int VP8Scan[16 + 4 + 4]; // in quant.c 124extern const int VP8UVModeOffsets[4]; // in analyze.c 125extern const int VP8I16ModeOffsets[4]; 126extern const int VP8I4ModeOffsets[NUM_BMODES]; 127 128// Layout of prediction blocks 129// intra 16x16 130#define I16DC16 (0 * 16 * BPS) 131#define I16TM16 (1 * 16 * BPS) 132#define I16VE16 (2 * 16 * BPS) 133#define I16HE16 (3 * 16 * BPS) 134// chroma 8x8, two U/V blocks side by side (hence: 16x8 each) 135#define C8DC8 (4 * 16 * BPS) 136#define C8TM8 (4 * 16 * BPS + 8 * BPS) 137#define C8VE8 (5 * 16 * BPS) 138#define C8HE8 (5 * 16 * BPS + 8 * BPS) 139// intra 4x4 140#define I4DC4 (6 * 16 * BPS + 0) 141#define I4TM4 (6 * 16 * BPS + 4) 142#define I4VE4 (6 * 16 * BPS + 8) 143#define I4HE4 (6 * 16 * BPS + 12) 144#define I4RD4 (6 * 16 * BPS + 4 * BPS + 0) 145#define I4VR4 (6 * 16 * BPS + 4 * BPS + 4) 146#define I4LD4 (6 * 16 * BPS + 4 * BPS + 8) 147#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12) 148#define I4HD4 (6 * 16 * BPS + 8 * BPS + 0) 149#define I4HU4 (6 * 16 * BPS + 8 * BPS + 4) 150#define I4TMP (6 * 16 * BPS + 8 * BPS + 8) 151 152typedef int64_t score_t; // type used for scores, rate, distortion 153#define MAX_COST ((score_t)0x7fffffffffffffLL) 154 155#define QFIX 17 156#define BIAS(b) ((b) << (QFIX - 8)) 157// Fun fact: this is the _only_ line where we're actually being lossy and 158// discarding bits. 159static inline int QUANTDIV(int n, int iQ, int B) { 160 return (n * iQ + B) >> QFIX; 161} 162extern const uint8_t VP8Zigzag[16]; 163 164//----------------------------------------------------------------------------- 165// Headers 166 167typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; 168typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2]; 169typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; 170typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats 171 172typedef struct VP8Encoder VP8Encoder; 173 174// segment features 175typedef struct { 176 int num_segments_; // Actual number of segments. 1 segment only = unused. 177 int update_map_; // whether to update the segment map or not. 178 // must be 0 if there's only 1 segment. 179 int size_; // bit-cost for transmitting the segment map 180} VP8SegmentHeader; 181 182// Struct collecting all frame-persistent probabilities. 183typedef struct { 184 uint8_t segments_[3]; // probabilities for segment tree 185 uint8_t skip_proba_; // final probability of being skipped. 186 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes 187 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 7.4k 188 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k 189 int use_skip_proba_; // Note: we always use skip_proba for now. 190 int nb_skip_; // number of skipped blocks 191} VP8Proba; 192 193// Filter parameters. Not actually used in the code (we don't perform 194// the in-loop filtering), but filled from user's config 195typedef struct { 196 int simple_; // filtering type: 0=complex, 1=simple 197 int level_; // base filter level [0..63] 198 int sharpness_; // [0..7] 199 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16 200} VP8FilterHeader; 201 202//----------------------------------------------------------------------------- 203// Informations about the macroblocks. 204 205typedef struct { 206 // block type 207 uint8_t type_:2; // 0=i4x4, 1=i16x16 208 uint8_t uv_mode_:2; 209 uint8_t skip_:1; 210 uint8_t segment_:2; 211 uint8_t alpha_; // quantization-susceptibility 212} VP8MBInfo; 213 214typedef struct { 215 uint16_t q_[16]; // quantizer steps 216 uint16_t iq_[16]; // reciprocals, fixed point. 217 uint16_t bias_[16]; // rounding bias 218 uint16_t zthresh_[16]; // value under which a coefficient is zeroed 219 uint16_t sharpen_[16]; // frequency boosters for slight sharpening 220} VP8Matrix; 221 222typedef struct { 223 VP8Matrix y1_, y2_, uv_; // quantization matrices 224 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral. 225 // Lower values indicate a lower risk of blurriness. 226 int beta_; // filter-susceptibility, range [0,255]. 227 int quant_; // final segment quantizer. 228 int fstrength_; // final in-loop filtering strength 229 // reactivities 230 int lambda_i16_, lambda_i4_, lambda_uv_; 231 int lambda_mode_, lambda_trellis_, tlambda_; 232 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_; 233} VP8SegmentInfo; 234 235// Handy transcient struct to accumulate score and info during RD-optimization 236// and mode evaluation. 237typedef struct { 238 score_t D, SD, R, score; // Distortion, spectral distortion, rate, score. 239 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma. 240 int16_t y_ac_levels[16][16]; 241 int16_t uv_levels[4 + 4][16]; 242 int mode_i16; // mode number for intra16 prediction 243 int modes_i4[16]; // mode numbers for intra4 predictions 244 int mode_uv; // mode number of chroma prediction 245 uint32_t nz; // non-zero blocks 246} VP8ModeScore; 247 248// Iterator structure to iterate through macroblocks, pointing to the 249// right neighbouring data (samples, predictions, contexts, ...) 250typedef struct { 251 int x_, y_; // current macroblock 252 int y_offset_, uv_offset_; // offset to the luma / chroma planes 253 int y_stride_, uv_stride_; // respective strides 254 uint8_t* yuv_in_; // borrowed from enc_ (for now) 255 uint8_t* yuv_out_; // '' 256 uint8_t* yuv_out2_; // '' 257 uint8_t* yuv_p_; // '' 258 VP8Encoder* enc_; // back-pointer 259 VP8MBInfo* mb_; // current macroblock 260 VP8BitWriter* bw_; // current bit-writer 261 uint8_t* preds_; // intra mode predictors (4x4 blocks) 262 uint32_t* nz_; // non-zero pattern 263 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4 264 uint8_t* i4_top_; // pointer to the current top boundary sample 265 int i4_; // current intra4x4 mode being tested 266 int top_nz_[9]; // top-non-zero context. 267 int left_nz_[9]; // left-non-zero. left_nz[8] is independent. 268 uint64_t bit_count_[4][3]; // bit counters for coded levels. 269 uint64_t luma_bits_; // macroblock bit-cost for luma 270 uint64_t uv_bits_; // macroblock bit-cost for chroma 271 LFStats* lf_stats_; // filter stats (borrowed from enc_) 272 int do_trellis_; // if true, perform extra level optimisation 273 int done_; // true when scan is finished 274} VP8EncIterator; 275 276 // in iterator.c 277// must be called first. 278void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it); 279// restart a scan. 280void VP8IteratorReset(VP8EncIterator* const it); 281// import samples from source 282void VP8IteratorImport(const VP8EncIterator* const it); 283// export decimated samples 284void VP8IteratorExport(const VP8EncIterator* const it); 285// go to next macroblock. Returns !done_. If *block_to_save is non-null, will 286// save the boundary values to top_/left_ arrays. block_to_save can be 287// it->yuv_out_ or it->yuv_in_. 288int VP8IteratorNext(VP8EncIterator* const it, 289 const uint8_t* const block_to_save); 290// Intra4x4 iterations 291void VP8IteratorStartI4(VP8EncIterator* const it); 292// returns true if not done. 293int VP8IteratorRotateI4(VP8EncIterator* const it, 294 const uint8_t* const yuv_out); 295 296// Non-zero context setup/teardown 297void VP8IteratorNzToBytes(VP8EncIterator* const it); 298void VP8IteratorBytesToNz(VP8EncIterator* const it); 299 300// Helper functions to set mode properties 301void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode); 302void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]); 303void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode); 304void VP8SetSkip(const VP8EncIterator* const it, int skip); 305void VP8SetSegment(const VP8EncIterator* const it, int segment); 306void VP8IteratorResetCosts(VP8EncIterator* const it); 307 308//----------------------------------------------------------------------------- 309// VP8Encoder 310 311struct VP8Encoder { 312 const WebPConfig* config_; // user configuration and parameters 313 WebPPicture* pic_; // input / output picture 314 315 // headers 316 VP8FilterHeader filter_hdr_; // filtering information 317 VP8SegmentHeader segment_hdr_; // segment information 318 319 int profile_; // VP8's profile, deduced from Config. 320 321 // dimension, in macroblock units. 322 int mb_w_, mb_h_; 323 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1) 324 325 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS) 326 int num_parts_; 327 328 // per-partition boolean decoders. 329 VP8BitWriter bw_; // part0 330 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions 331 332 // transparency blob 333 int has_alpha_; 334 uint8_t* alpha_data_; // non-NULL if transparency is present 335 size_t alpha_data_size_; 336 337 // enhancement layer 338 int use_layer_; 339 VP8BitWriter layer_bw_; 340 uint8_t* layer_data_; 341 size_t layer_data_size_; 342 343 // quantization info (one set of DC/AC dequant factor per segment) 344 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS]; 345 int base_quant_; // nominal quantizer value. Only used 346 // for relative coding of segments' quant. 347 int uv_alpha_; // U/V quantization susceptibility 348 // global offset of quantizers, shared by all segments 349 int dq_y1_dc_; 350 int dq_y2_dc_, dq_y2_ac_; 351 int dq_uv_dc_, dq_uv_ac_; 352 353 // probabilities and statistics 354 VP8Proba proba_; 355 uint64_t sse_[3]; // sum of Y/U/V squared errors for all macroblocks 356 uint64_t sse_count_; // pixel count for the sse_[] stats 357 int coded_size_; 358 int residual_bytes_[3][4]; 359 int block_count_[3]; 360 361 // quality/speed settings 362 int method_; // 0=fastest, 6=best/slowest. 363 int rd_opt_level_; // Deduced from method_. 364 365 // Memory 366 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) 367 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1) 368 uint32_t* nz_; // non-zero bit context: mb_w+1 369 uint8_t* yuv_in_; // input samples 370 uint8_t* yuv_out_; // output samples 371 uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_. 372 uint8_t* yuv_p_; // scratch buffer for prediction 373 uint8_t *y_top_; // top luma samples. 374 uint8_t *uv_top_; // top u/v samples. 375 // U and V are packed into 16 pixels (8 U + 8 V) 376 uint8_t *y_left_; // left luma samples (adressable from index -1 to 15). 377 uint8_t *u_left_; // left u samples (adressable from index -1 to 7) 378 uint8_t *v_left_; // left v samples (adressable from index -1 to 7) 379 380 LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off) 381}; 382 383//----------------------------------------------------------------------------- 384// internal functions. Not public. 385 386 // in tree.c 387extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; 388extern const uint8_t 389 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS]; 390// Reset the token probabilities to their initial (default) values 391void VP8DefaultProbas(VP8Encoder* const enc); 392// Write the token probabilities 393void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas); 394// Writes the partition #0 modes (that is: all intra modes) 395void VP8CodeIntraModes(VP8Encoder* const enc); 396 397 // in syntax.c 398// Generates the final bitstream by coding the partition0 and headers, 399// and appending an assembly of all the pre-coded token partitions. 400// Return true if everything is ok. 401int VP8EncWrite(VP8Encoder* const enc); 402 403 // in frame.c 404extern const uint8_t VP8EncBands[16 + 1]; 405// Form all the four Intra16x16 predictions in the yuv_p_ cache 406void VP8MakeLuma16Preds(const VP8EncIterator* const it); 407// Form all the four Chroma8x8 predictions in the yuv_p_ cache 408void VP8MakeChroma8Preds(const VP8EncIterator* const it); 409// Form all the ten Intra4x4 predictions in the yuv_p_ cache 410// for the 4x4 block it->i4_ 411void VP8MakeIntra4Preds(const VP8EncIterator* const it); 412// Rate calculation 413int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd); 414int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]); 415int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd); 416// Main stat / coding passes 417int VP8EncLoop(VP8Encoder* const enc); 418int VP8StatLoop(VP8Encoder* const enc); 419 420 // in webpenc.c 421// Assign an error code to a picture. Return false for convenience. 422int WebPEncodingSetError(WebPPicture* const pic, WebPEncodingError error); 423 // in analysis.c 424// Compute susceptibility based on DCT-coeff histograms: 425// the higher, the "easier" the macroblock is to compress. 426typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, 427 int start_block, int end_block); 428extern VP8CHisto VP8CollectHistogram; 429// Main analysis loop. Decides the segmentations and complexity. 430// Assigns a first guess for Intra16 and uvmode_ prediction modes. 431int VP8EncAnalyze(VP8Encoder* const enc); 432 433 // in quant.c 434// Sets up segment's quantization values, base_quant_ and filter strengths. 435void VP8SetSegmentParams(VP8Encoder* const enc, float quality); 436// Pick best modes and fills the levels. Returns true if skipped. 437int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt); 438 439 // in alpha.c 440void VP8EncInitAlpha(VP8Encoder* enc); // initialize alpha compression 441void VP8EncCodeAlphaBlock(VP8EncIterator* it); // analyze or code a macroblock 442int VP8EncFinishAlpha(VP8Encoder* enc); // finalize compressed data 443void VP8EncDeleteAlpha(VP8Encoder* enc); // delete compressed data 444 445 // in layer.c 446void VP8EncInitLayer(VP8Encoder* const enc); // init everything 447void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock 448int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding 449void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory 450 451 // in dsp.c 452int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]); 453 454// Transforms 455// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms 456// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4). 457typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst, 458 int do_two); 459typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out); 460typedef void (*VP8WHT)(const int16_t* in, int16_t* out); 461extern VP8Idct VP8ITransform; 462extern VP8Fdct VP8FTransform; 463extern VP8WHT VP8ITransformWHT; 464extern VP8WHT VP8FTransformWHT; 465// Predictions 466// *dst is the destination block. *top, *top_right and *left can be NULL. 467typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left, 468 const uint8_t* top); 469typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top); 470extern VP8Intra4Preds VP8EncPredLuma4; 471extern VP8IntraPreds VP8EncPredLuma16; 472extern VP8IntraPreds VP8EncPredChroma8; 473 474typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref); 475extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4; 476typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref, 477 const uint16_t* const weights); 478extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16; 479 480typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst); 481extern VP8BlockCopy VP8Copy4x4; 482extern VP8BlockCopy VP8Copy8x8; 483extern VP8BlockCopy VP8Copy16x16; 484// Quantization 485typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], 486 int n, const VP8Matrix* const mtx); 487extern VP8QuantizeBlock VP8EncQuantizeBlock; 488 489typedef enum { 490 kSSE2, 491 kSSE3 492} CPUFeature; 493// returns true if the CPU supports the feature. 494typedef int (*VP8CPUInfo)(CPUFeature feature); 495extern VP8CPUInfo VP8EncGetCPUInfo; 496 497void VP8EncDspInit(void); // must be called before using any of the above 498 499 // in filter.c 500extern void VP8InitFilter(VP8EncIterator* const it); 501extern void VP8StoreFilterStats(VP8EncIterator* const it); 502extern void VP8AdjustFilterStrength(VP8EncIterator* const it); 503 504//----------------------------------------------------------------------------- 505 506#if defined(__cplusplus) || defined(c_plusplus) 507} // extern "C" 508#endif 509 510#endif // WEBP_ENC_VP8ENCI_H_ 511