1// Copyright 2011 Google Inc.
2//
3// This code is licensed under the same terms as WebM:
4//  Software License Agreement:  http://www.webmproject.org/license/software/
5//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
6// -----------------------------------------------------------------------------
7//
8//   WebP encoder: internal header.
9//
10// Author: Skal (pascal.massimino@gmail.com)
11
12#ifndef WEBP_ENC_VP8ENCI_H_
13#define WEBP_ENC_VP8ENCI_H_
14
15#include "string.h"     // for memcpy()
16#include "webp/encode.h"
17#include "bit_writer.h"
18
19#if defined(__cplusplus) || defined(c_plusplus)
20extern "C" {
21#endif
22
23//-----------------------------------------------------------------------------
24// Various defines and enums
25
26// version numbers
27#define ENC_MAJ_VERSION 0
28#define ENC_MIN_VERSION 1
29#define ENC_REV_VERSION 2
30
31// size of histogram used by CollectHistogram.
32#define MAX_COEFF_THRESH   64
33
34// intra prediction modes
35enum { B_DC_PRED = 0,   // 4x4 modes
36       B_TM_PRED = 1,
37       B_VE_PRED = 2,
38       B_HE_PRED = 3,
39       B_RD_PRED = 4,
40       B_VR_PRED = 5,
41       B_LD_PRED = 6,
42       B_VL_PRED = 7,
43       B_HD_PRED = 8,
44       B_HU_PRED = 9,
45       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
46
47       // Luma16 or UV modes
48       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
49       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
50     };
51
52enum { NUM_MB_SEGMENTS = 4,
53       MAX_NUM_PARTITIONS = 8,
54       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
55       NUM_BANDS = 8,
56       NUM_CTX = 3,
57       NUM_PROBAS = 11,
58       MAX_LF_LEVELS = 64,      // Maximum loop filter level
59       MAX_VARIABLE_LEVEL = 67  // last (inclusive) level with variable cost
60     };
61
62// YUV-cache parameters. Cache is 16-pixels wide.
63// The original or reconstructed samples can be accessed using VP8Scan[]
64// The predicted blocks can be accessed using offsets to yuv_p_ and
65// the arrays VP8*ModeOffsets[];
66//         +----+      YUV Samples area. See VP8Scan[] for accessing the blocks.
67//  Y_OFF  |YYYY| <- original samples  (enc->yuv_in_)
68//         |YYYY|
69//         |YYYY|
70//         |YYYY|
71//  U_OFF  |UUVV| V_OFF  (=U_OFF + 8)
72//         |UUVV|
73//         +----+
74//  Y_OFF  |YYYY| <- compressed/decoded samples  ('yuv_out_')
75//         |YYYY|    There are two buffers like this ('yuv_out_'/'yuv_out2_')
76//         |YYYY|
77//         |YYYY|
78//  U_OFF  |UUVV| V_OFF
79//         |UUVV|
80//          x2 (for yuv_out2_)
81//         +----+     Prediction area ('yuv_p_', size = PRED_SIZE)
82// I16DC16 |YYYY|  Intra16 predictions (16x16 block each)
83//         |YYYY|
84//         |YYYY|
85//         |YYYY|
86// I16TM16 |YYYY|
87//         |YYYY|
88//         |YYYY|
89//         |YYYY|
90// I16VE16 |YYYY|
91//         |YYYY|
92//         |YYYY|
93//         |YYYY|
94// I16HE16 |YYYY|
95//         |YYYY|
96//         |YYYY|
97//         |YYYY|
98//         +----+  Chroma U/V predictions (16x8 block each)
99// C8DC8   |UUVV|
100//         |UUVV|
101// C8TM8   |UUVV|
102//         |UUVV|
103// C8VE8   |UUVV|
104//         |UUVV|
105// C8HE8   |UUVV|
106//         |UUVV|
107//         +----+  Intra 4x4 predictions (4x4 block each)
108//         |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
109//         |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
110//         |YY..| I4HD4 I4HU4 I4TMP
111//         +----+
112#define BPS       16   // this is the common stride
113#define Y_SIZE   (BPS * 16)
114#define UV_SIZE  (BPS * 8)
115#define YUV_SIZE (Y_SIZE + UV_SIZE)
116#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
117#define Y_OFF    (0)
118#define U_OFF    (Y_SIZE)
119#define V_OFF    (U_OFF + 8)
120#define ALIGN_CST 15
121#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
122
123extern const int VP8Scan[16 + 4 + 4];           // in quant.c
124extern const int VP8UVModeOffsets[4];           // in analyze.c
125extern const int VP8I16ModeOffsets[4];
126extern const int VP8I4ModeOffsets[NUM_BMODES];
127
128// Layout of prediction blocks
129// intra 16x16
130#define I16DC16 (0 * 16 * BPS)
131#define I16TM16 (1 * 16 * BPS)
132#define I16VE16 (2 * 16 * BPS)
133#define I16HE16 (3 * 16 * BPS)
134// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
135#define C8DC8 (4 * 16 * BPS)
136#define C8TM8 (4 * 16 * BPS + 8 * BPS)
137#define C8VE8 (5 * 16 * BPS)
138#define C8HE8 (5 * 16 * BPS + 8 * BPS)
139// intra 4x4
140#define I4DC4 (6 * 16 * BPS +  0)
141#define I4TM4 (6 * 16 * BPS +  4)
142#define I4VE4 (6 * 16 * BPS +  8)
143#define I4HE4 (6 * 16 * BPS + 12)
144#define I4RD4 (6 * 16 * BPS + 4 * BPS +  0)
145#define I4VR4 (6 * 16 * BPS + 4 * BPS +  4)
146#define I4LD4 (6 * 16 * BPS + 4 * BPS +  8)
147#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
148#define I4HD4 (6 * 16 * BPS + 8 * BPS +  0)
149#define I4HU4 (6 * 16 * BPS + 8 * BPS +  4)
150#define I4TMP (6 * 16 * BPS + 8 * BPS +  8)
151
152typedef int64_t score_t;     // type used for scores, rate, distortion
153#define MAX_COST ((score_t)0x7fffffffffffffLL)
154
155#define QFIX 17
156#define BIAS(b)  ((b) << (QFIX - 8))
157// Fun fact: this is the _only_ line where we're actually being lossy and
158// discarding bits.
159static inline int QUANTDIV(int n, int iQ, int B) {
160  return (n * iQ + B) >> QFIX;
161}
162extern const uint8_t VP8Zigzag[16];
163
164//-----------------------------------------------------------------------------
165// Headers
166
167typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
168typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2];
169typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
170typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS];  // filter stats
171
172typedef struct VP8Encoder VP8Encoder;
173
174// segment features
175typedef struct {
176  int num_segments_;      // Actual number of segments. 1 segment only = unused.
177  int update_map_;        // whether to update the segment map or not.
178                          // must be 0 if there's only 1 segment.
179  int size_;              // bit-cost for transmitting the segment map
180} VP8SegmentHeader;
181
182// Struct collecting all frame-persistent probabilities.
183typedef struct {
184  uint8_t segments_[3];     // probabilities for segment tree
185  uint8_t skip_proba_;      // final probability of being skipped.
186  ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 924 bytes
187  StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 7.4k
188  CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 11.4k
189  int use_skip_proba_;      // Note: we always use skip_proba for now.
190  int nb_skip_;             // number of skipped blocks
191} VP8Proba;
192
193// Filter parameters. Not actually used in the code (we don't perform
194// the in-loop filtering), but filled from user's config
195typedef struct {
196  int simple_;             // filtering type: 0=complex, 1=simple
197  int level_;              // base filter level [0..63]
198  int sharpness_;          // [0..7]
199  int i4x4_lf_delta_;      // delta filter level for i4x4 relative to i16x16
200} VP8FilterHeader;
201
202//-----------------------------------------------------------------------------
203// Informations about the macroblocks.
204
205typedef struct {
206  // block type
207  uint8_t type_:2;     // 0=i4x4, 1=i16x16
208  uint8_t uv_mode_:2;
209  uint8_t skip_:1;
210  uint8_t segment_:2;
211  uint8_t alpha_;      // quantization-susceptibility
212} VP8MBInfo;
213
214typedef struct {
215  uint16_t q_[16];        // quantizer steps
216  uint16_t iq_[16];       // reciprocals, fixed point.
217  uint16_t bias_[16];     // rounding bias
218  uint16_t zthresh_[16];  // value under which a coefficient is zeroed
219  uint16_t sharpen_[16];  // frequency boosters for slight sharpening
220} VP8Matrix;
221
222typedef struct {
223  VP8Matrix y1_, y2_, uv_;  // quantization matrices
224  int alpha_;      // quant-susceptibility, range [-127,127]. Zero is neutral.
225                   // Lower values indicate a lower risk of blurriness.
226  int beta_;       // filter-susceptibility, range [0,255].
227  int quant_;      // final segment quantizer.
228  int fstrength_;  // final in-loop filtering strength
229  // reactivities
230  int lambda_i16_, lambda_i4_, lambda_uv_;
231  int lambda_mode_, lambda_trellis_, tlambda_;
232  int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
233} VP8SegmentInfo;
234
235// Handy transcient struct to accumulate score and info during RD-optimization
236// and mode evaluation.
237typedef struct {
238  score_t D, SD, R, score;    // Distortion, spectral distortion, rate, score.
239  int16_t y_dc_levels[16];    // Quantized levels for luma-DC, luma-AC, chroma.
240  int16_t y_ac_levels[16][16];
241  int16_t uv_levels[4 + 4][16];
242  int mode_i16;               // mode number for intra16 prediction
243  int modes_i4[16];           // mode numbers for intra4 predictions
244  int mode_uv;                // mode number of chroma prediction
245  uint32_t nz;                // non-zero blocks
246} VP8ModeScore;
247
248// Iterator structure to iterate through macroblocks, pointing to the
249// right neighbouring data (samples, predictions, contexts, ...)
250typedef struct {
251  int x_, y_;                      // current macroblock
252  int y_offset_, uv_offset_;       // offset to the luma / chroma planes
253  int y_stride_, uv_stride_;       // respective strides
254  uint8_t*      yuv_in_;           // borrowed from enc_ (for now)
255  uint8_t*      yuv_out_;          // ''
256  uint8_t*      yuv_out2_;         // ''
257  uint8_t*      yuv_p_;            // ''
258  VP8Encoder*   enc_;              // back-pointer
259  VP8MBInfo*    mb_;               // current macroblock
260  VP8BitWriter* bw_;               // current bit-writer
261  uint8_t*      preds_;            // intra mode predictors (4x4 blocks)
262  uint32_t*     nz_;               // non-zero pattern
263  uint8_t       i4_boundary_[37];  // 32+5 boundary samples needed by intra4x4
264  uint8_t*      i4_top_;           // pointer to the current top boundary sample
265  int           i4_;               // current intra4x4 mode being tested
266  int           top_nz_[9];        // top-non-zero context.
267  int           left_nz_[9];       // left-non-zero. left_nz[8] is independent.
268  uint64_t      bit_count_[4][3];  // bit counters for coded levels.
269  uint64_t      luma_bits_;        // macroblock bit-cost for luma
270  uint64_t      uv_bits_;          // macroblock bit-cost for chroma
271  LFStats*      lf_stats_;         // filter stats (borrowed from enc_)
272  int           do_trellis_;       // if true, perform extra level optimisation
273  int           done_;             // true when scan is finished
274} VP8EncIterator;
275
276  // in iterator.c
277// must be called first.
278void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
279// restart a scan.
280void VP8IteratorReset(VP8EncIterator* const it);
281// import samples from source
282void VP8IteratorImport(const VP8EncIterator* const it);
283// export decimated samples
284void VP8IteratorExport(const VP8EncIterator* const it);
285// go to next macroblock. Returns !done_. If *block_to_save is non-null, will
286// save the boundary values to top_/left_ arrays. block_to_save can be
287// it->yuv_out_ or it->yuv_in_.
288int VP8IteratorNext(VP8EncIterator* const it,
289                    const uint8_t* const block_to_save);
290// Intra4x4 iterations
291void VP8IteratorStartI4(VP8EncIterator* const it);
292// returns true if not done.
293int VP8IteratorRotateI4(VP8EncIterator* const it,
294                        const uint8_t* const yuv_out);
295
296// Non-zero context setup/teardown
297void VP8IteratorNzToBytes(VP8EncIterator* const it);
298void VP8IteratorBytesToNz(VP8EncIterator* const it);
299
300// Helper functions to set mode properties
301void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
302void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]);
303void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
304void VP8SetSkip(const VP8EncIterator* const it, int skip);
305void VP8SetSegment(const VP8EncIterator* const it, int segment);
306void VP8IteratorResetCosts(VP8EncIterator* const it);
307
308//-----------------------------------------------------------------------------
309// VP8Encoder
310
311struct VP8Encoder {
312  const WebPConfig* config_;    // user configuration and parameters
313  WebPPicture* pic_;            // input / output picture
314
315  // headers
316  VP8FilterHeader   filter_hdr_;     // filtering information
317  VP8SegmentHeader  segment_hdr_;    // segment information
318
319  int profile_;                      // VP8's profile, deduced from Config.
320
321  // dimension, in macroblock units.
322  int mb_w_, mb_h_;
323  int preds_w_;   // stride of the *preds_ prediction plane (=4*mb_w + 1)
324
325  // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
326  int num_parts_;
327
328  // per-partition boolean decoders.
329  VP8BitWriter bw_;                         // part0
330  VP8BitWriter parts_[MAX_NUM_PARTITIONS];  // token partitions
331
332  // transparency blob
333  int has_alpha_;
334  uint8_t* alpha_data_;       // non-NULL if transparency is present
335  size_t alpha_data_size_;
336
337  // enhancement layer
338  int use_layer_;
339  VP8BitWriter layer_bw_;
340  uint8_t* layer_data_;
341  size_t layer_data_size_;
342
343  // quantization info (one set of DC/AC dequant factor per segment)
344  VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
345  int base_quant_;                 // nominal quantizer value. Only used
346                                   // for relative coding of segments' quant.
347  int uv_alpha_;                   // U/V quantization susceptibility
348  // global offset of quantizers, shared by all segments
349  int dq_y1_dc_;
350  int dq_y2_dc_, dq_y2_ac_;
351  int dq_uv_dc_, dq_uv_ac_;
352
353  // probabilities and statistics
354  VP8Proba proba_;
355  uint64_t sse_[3];        // sum of Y/U/V squared errors for all macroblocks
356  uint64_t sse_count_;     // pixel count for the sse_[] stats
357  int      coded_size_;
358  int      residual_bytes_[3][4];
359  int      block_count_[3];
360
361  // quality/speed settings
362  int method_;             // 0=fastest, 6=best/slowest.
363  int rd_opt_level_;       // Deduced from method_.
364
365  // Memory
366  VP8MBInfo* mb_info_;   // contextual macroblock infos (mb_w_ + 1)
367  uint8_t*   preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
368  uint32_t*  nz_;        // non-zero bit context: mb_w+1
369  uint8_t*   yuv_in_;    // input samples
370  uint8_t*   yuv_out_;   // output samples
371  uint8_t*   yuv_out2_;  // secondary scratch out-buffer. swapped with yuv_out_.
372  uint8_t*   yuv_p_;     // scratch buffer for prediction
373  uint8_t   *y_top_;     // top luma samples.
374  uint8_t   *uv_top_;    // top u/v samples.
375                         // U and V are packed into 16 pixels (8 U + 8 V)
376  uint8_t   *y_left_;    // left luma samples (adressable from index -1 to 15).
377  uint8_t   *u_left_;    // left u samples (adressable from index -1 to 7)
378  uint8_t   *v_left_;    // left v samples (adressable from index -1 to 7)
379
380  LFStats   *lf_stats_;  // autofilter stats (if NULL, autofilter is off)
381};
382
383//-----------------------------------------------------------------------------
384// internal functions. Not public.
385
386  // in tree.c
387extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
388extern const uint8_t
389    VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
390// Reset the token probabilities to their initial (default) values
391void VP8DefaultProbas(VP8Encoder* const enc);
392// Write the token probabilities
393void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
394// Writes the partition #0 modes (that is: all intra modes)
395void VP8CodeIntraModes(VP8Encoder* const enc);
396
397  // in syntax.c
398// Generates the final bitstream by coding the partition0 and headers,
399// and appending an assembly of all the pre-coded token partitions.
400// Return true if everything is ok.
401int VP8EncWrite(VP8Encoder* const enc);
402
403  // in frame.c
404extern const uint8_t VP8EncBands[16 + 1];
405// Form all the four Intra16x16 predictions in the yuv_p_ cache
406void VP8MakeLuma16Preds(const VP8EncIterator* const it);
407// Form all the four Chroma8x8 predictions in the yuv_p_ cache
408void VP8MakeChroma8Preds(const VP8EncIterator* const it);
409// Form all the ten Intra4x4 predictions in the yuv_p_ cache
410// for the 4x4 block it->i4_
411void VP8MakeIntra4Preds(const VP8EncIterator* const it);
412// Rate calculation
413int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
414int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
415int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
416// Main stat / coding passes
417int VP8EncLoop(VP8Encoder* const enc);
418int VP8StatLoop(VP8Encoder* const enc);
419
420  // in webpenc.c
421// Assign an error code to a picture. Return false for convenience.
422int WebPEncodingSetError(WebPPicture* const pic, WebPEncodingError error);
423  // in analysis.c
424// Compute susceptibility based on DCT-coeff histograms:
425// the higher, the "easier" the macroblock is to compress.
426typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
427                         int start_block, int end_block);
428extern VP8CHisto VP8CollectHistogram;
429// Main analysis loop. Decides the segmentations and complexity.
430// Assigns a first guess for Intra16 and uvmode_ prediction modes.
431int VP8EncAnalyze(VP8Encoder* const enc);
432
433  // in quant.c
434// Sets up segment's quantization values, base_quant_ and filter strengths.
435void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
436// Pick best modes and fills the levels. Returns true if skipped.
437int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
438
439  // in alpha.c
440void VP8EncInitAlpha(VP8Encoder* enc);           // initialize alpha compression
441void VP8EncCodeAlphaBlock(VP8EncIterator* it);   // analyze or code a macroblock
442int VP8EncFinishAlpha(VP8Encoder* enc);          // finalize compressed data
443void VP8EncDeleteAlpha(VP8Encoder* enc);         // delete compressed data
444
445  // in layer.c
446void VP8EncInitLayer(VP8Encoder* const enc);     // init everything
447void VP8EncCodeLayerBlock(VP8EncIterator* it);   // code one more macroblock
448int VP8EncFinishLayer(VP8Encoder* const enc);    // finalize coding
449void VP8EncDeleteLayer(VP8Encoder* enc);         // reclaim memory
450
451  // in dsp.c
452int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]);
453
454// Transforms
455// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
456//          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
457typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
458                        int do_two);
459typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
460typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
461extern VP8Idct VP8ITransform;
462extern VP8Fdct VP8FTransform;
463extern VP8WHT VP8ITransformWHT;
464extern VP8WHT VP8FTransformWHT;
465// Predictions
466// *dst is the destination block. *top, *top_right and *left can be NULL.
467typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
468                              const uint8_t* top);
469typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
470extern VP8Intra4Preds VP8EncPredLuma4;
471extern VP8IntraPreds VP8EncPredLuma16;
472extern VP8IntraPreds VP8EncPredChroma8;
473
474typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
475extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
476typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
477                          const uint16_t* const weights);
478extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
479
480typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
481extern VP8BlockCopy VP8Copy4x4;
482extern VP8BlockCopy VP8Copy8x8;
483extern VP8BlockCopy VP8Copy16x16;
484// Quantization
485typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
486                                int n, const VP8Matrix* const mtx);
487extern VP8QuantizeBlock VP8EncQuantizeBlock;
488
489typedef enum {
490  kSSE2,
491  kSSE3
492} CPUFeature;
493// returns true if the CPU supports the feature.
494typedef int (*VP8CPUInfo)(CPUFeature feature);
495extern VP8CPUInfo VP8EncGetCPUInfo;
496
497void VP8EncDspInit(void);   // must be called before using any of the above
498
499  // in filter.c
500extern void VP8InitFilter(VP8EncIterator* const it);
501extern void VP8StoreFilterStats(VP8EncIterator* const it);
502extern void VP8AdjustFilterStrength(VP8EncIterator* const it);
503
504//-----------------------------------------------------------------------------
505
506#if defined(__cplusplus) || defined(c_plusplus)
507}    // extern "C"
508#endif
509
510#endif  // WEBP_ENC_VP8ENCI_H_
511