ih264e_core_coding.h revision 8d3d303c7942ced6a987a52db8977d768dc3605f
1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22******************************************************************************
23* @file
24*  ih264e_core_coding.h
25*
26* @brief
27*  This file contains extern declarations of core coding routines
28*
29* @author
30*  ittiam
31*
32* @remarks
33*  none
34******************************************************************************
35*/
36
37#ifndef IH264E_CORE_CODING_H_
38#define IH264E_CORE_CODING_H_
39
40/*****************************************************************************/
41/* Constant Macros                                                           */
42/*****************************************************************************/
43
44/**
45******************************************************************************
46 *  @brief      Enable/Disable Hadamard transform of DC Coeff's
47******************************************************************************
48 */
49#define DISABLE_DC_TRANSFORM 0
50#define ENABLE_DC_TRANSFORM 1
51
52/**
53*******************************************************************************
54 *  @brief bit masks for DC and AC control flags
55*******************************************************************************
56 */
57
58#define DC_COEFF_CNT_LUMA_MB        16
59#define NUM_4X4_BLKS_LUMA_MB_ROW    4
60#define NUM_LUMA4x4_BLOCKS_IN_MB    16
61#define NUM_CHROMA4x4_BLOCKS_IN_MB  8
62
63#define SIZE_4X4_BLK_HRZ            TRANS_SIZE_4
64#define SIZE_4X4_BLK_VERT           TRANS_SIZE_4
65
66#define CNTRL_FLAG_DC_MASK_LUMA     0x0000FFFF
67#define CNTRL_FLAG_AC_MASK_LUMA     0xFFFF0000
68
69#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
70#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
71
72#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
73#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
74
75#define CNTRL_FLAG_AC_MASK_CHROMA   ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V )
76#define CNTRL_FLAG_DC_MASK_CHROMA   ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V )
77
78#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
79
80/**
81*******************************************************************************
82 *  @brief macros for transforms
83*******************************************************************************
84 */
85#define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl,  blk_lin_id)                     \
86{                                                                              \
87  blk_lin_id = CLZ(u4_cntrl);                                                  \
88  u4_cntrl &= (0x7FFFFFFF >> blk_lin_id);                                      \
89};
90
91#define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y)                      \
92{                                                                               \
93     i4_offset_x = (u4_blk_id % 4) << 2;                                        \
94     i4_offset_y = (u4_blk_id / 4) << 2;                                        \
95}
96
97#define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y)                   \
98{                                                                              \
99     i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3);                \
100     i4_offset_y = (u4_blk_id & 0x2) << 1;                                     \
101}
102
103
104/*****************************************************************************/
105/* Function Declarations                                                     */
106/*****************************************************************************/
107
108/**
109*******************************************************************************
110*
111* @brief
112*  This function performs does the DCT transform then Hadamard transform
113*  and quantization for a macroblock when the mb mode is intra 16x16 mode
114*
115* @par Description:
116*  First  cf4 is done on all 16 4x4 blocks of the 16x16 input block.
117*  Then hadamard transform is done on the DC coefficients
118*  Quantization is then performed on the 16x16 block, 4x4 wise
119*
120* @param[in] pu1_src
121*  Pointer to source sub-block
122*
123* @param[in] pu1_pred
124*  Pointer to prediction sub-block
125*
126* @param[in] pi2_out
127*  Pointer to residual sub-block
128*  The output will be in linear format
129*  The first 16 continuous locations will contain the values of Dc block
130*  After DC block and a stride 1st AC block will follow
131*  After one more stride next AC block will follow
132*  The blocks will be in raster scan order
133*
134* @param[in] src_strd
135*  Source stride
136*
137* @param[in] pred_strd
138*  Prediction stride
139*
140* @param[in] dst_strd
141*  Destination stride
142*
143* @param[in] pu2_scale_matrix
144*  The quantization matrix for 4x4 transform
145*
146* @param[in] pu2_threshold_matrix
147*  Threshold matrix
148*
149* @param[in] u4_qbits
150*  15+QP/6
151*
152* @param[in] u4_round_factor
153*  Round factor for quant
154*
155* @param[out] pu1_nnz
156*  Memory to store the non-zeros after transform
157*  The first byte will be the nnz of DC block
158*  From the next byte the AC nnzs will be stored in raster scan order
159*
160* @param u4_dc_flag
161*  Signals if Dc transform is to be done or not
162*   1 -> Dc transform will be done
163*   0 -> Dc transform will not be done
164*
165* @remarks
166*
167*******************************************************************************
168*/
169void ih264e_luma_16x16_resi_trans_dctrans_quant(
170                codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
171                WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
172                WORD32 dst_strd, const UWORD16 *pu2_scale_matrix,
173                const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
174                UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
175
176/**
177*******************************************************************************
178*
179* @brief
180*  This function performs the intra 16x16 inverse transform process for H264
181*  it includes inverse Dc transform, inverse quant and then inverse transform
182*
183* @par Description:
184*
185* @param[in] pi2_src
186*  Input data, 16x16 size
187*  First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
188*  after a stride 1st AC clock will be present again in raster can order
189*  Then each AC block of the 16x16 block will follow in raster scan order
190*
191* @param[in] pu1_pred
192*  The predicted data, 16x16 size
193*  Block by block form
194*
195* @param[in] pu1_out
196*  Output 16x16
197*  In block by block form
198*
199* @param[in] src_strd
200*  Source stride
201*
202* @param[in] pred_strd
203*  input stride for prediction buffer
204*
205* @param[in] out_strd
206*  input stride for output buffer
207*
208* @param[in] pu2_iscale_mat
209*  Inverse quantization matrix for 4x4 transform
210*
211* @param[in] pu2_weigh_mat
212*  weight matrix of 4x4 transform
213*
214* @param[in] qp_div
215*  QP/6
216*
217* @param[in] pi4_tmp
218*  Input temporary buffer
219*  needs to be at least 20 in size
220*
221* @param[in] pu4_cntrl
222*  Controls the transform path
223*  total Last 17 bits are used
224*  the 16th th bit will correspond to DC block
225*  and 32-17 will correspond to the ac blocks in raster scan order
226*  bit equaling zero indicates that the entire 4x4 block is zero for DC
227*  For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
228*
229* @param[in] pi4_tmp
230*  Input temporary buffer
231*  needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
232*
233* @returns
234*  none
235*
236* @remarks
237*  The all zero case must be taken care outside
238*
239*******************************************************************************
240*/
241void ih264e_luma_16x16_idctrans_iquant_itrans_recon(
242                codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
243                UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
244                WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
245                const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
246                UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
247
248/**
249*******************************************************************************
250*
251* @brief
252*  This function performs does the DCT transform then Hadamard transform
253*  and quantization for a chroma macroblock
254*
255* @par Description:
256*  First  cf4 is done on all 16 4x4 blocks of the 8x8input block
257*  Then hadamard transform is done on the DC coefficients
258*  Quantization is then performed on the 8x8 block, 4x4 wise
259*
260* @param[in] pu1_src
261*  Pointer to source sub-block
262*  The input is in interleaved format for two chroma planes
263*
264* @param[in] pu1_pred
265*  Pointer to prediction sub-block
266*  Prediction is in inter leaved format
267*
268* @param[in] pi2_out
269*  Pointer to residual sub-block
270*  The output will be in linear format
271*  The first 4 continuous locations will contain the values of DC block for U
272*  and then next 4 will contain for V.
273*  After DC block and a stride 1st AC block of U plane will follow
274*  After one more stride next AC block of V plane will follow
275*  The blocks will be in raster scan order
276*
277*  After all the AC blocks of U plane AC blocks of V plane will follow in exact
278*  same way
279*
280* @param[in] src_strd
281*  Source stride
282*
283* @param[in] pred_strd
284*  Prediction stride
285*
286* @param[in] dst_strd
287*  Destination stride
288*
289* @param[in] pu2_scale_matrix
290*  The quantization matrix for 4x4 transform
291*
292* @param[in] pu2_threshold_matrix
293*  Threshold matrix
294*
295* @param[in] u4_qbits
296*  15+QP/6
297*
298* @param[in] u4_round_factor
299*  Round factor for quant
300*
301* @param[out] pu1_nnz
302*  Memory to store the non-zeros after transform
303*  The first byte will be the nnz od DC block for U plane
304*  From the next byte the AC nnzs will be storerd in raster scan order
305*  The fifth byte will be nnz of Dc block of V plane
306*  Then Ac blocks will follow
307*
308* @param u4_dc_flag
309*  Signals if Dc transform is to be done or not
310*   1 -> Dc transform will be done
311*   0 -> Dc transform will not be done
312*
313* @remarks
314*
315*******************************************************************************
316*/
317void ih264e_chroma_8x8_resi_trans_dctrans_quant(
318                codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
319                WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
320                WORD32 out_strd, const UWORD16 *pu2_scale_matrix,
321                const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
322                UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c);
323
324/**
325*******************************************************************************
326* @brief
327*  This function performs the inverse transform with process for chroma MB of H264
328*
329* @par Description:
330*  Does inverse DC transform ,inverse quantization inverse transform
331*
332* @param[in] pi2_src
333*  Input data, 16x16 size
334*  The input is in the form of, first 4 locations will contain DC coeffs of
335*  U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
336*  in raster scan order will follow, each block as linear array in raster scan order.
337*  After a stride next AC block will follow. After all AC blocks of U plane
338*  V plane AC blocks will follow in exact same order.
339*
340* @param[in] pu1_pred
341*  The predicted data, 8x16 size, U and V interleaved
342*
343* @param[in] pu1_out
344*  Output 8x16, U and V interleaved
345*
346* @param[in] src_strd
347*  Source stride
348*
349* @param[in] pred_strd
350*  input stride for prediction buffer
351*
352* @param[in] out_strd
353*  input stride for output buffer
354*
355* @param[in] pu2_iscale_mat
356*  Inverse quantization martix for 4x4 transform
357*
358* @param[in] pu2_weigh_mat
359*  weight matrix of 4x4 transform
360*
361* @param[in] qp_div
362*  QP/6
363*
364* @param[in] pi4_tmp
365*  Input temporary buffer
366*  needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
367*  in size
368*
369* @param[in] pu4_cntrl
370*  Controls the transform path
371*  the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
372*  32-28 bits will indicate AC blocks of U plane in raster scan order
373*  27-23 bits will indicate AC blocks of V plane in rater scan order
374*  The bit 1 implies that there is at least one non zero coff in a block
375*
376* @returns
377*  none
378*
379* @remarks
380*******************************************************************************
381*/
382void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
383                codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
384                UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
385                WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
386                const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
387                WORD32 *pi4_tmp);
388
389/**
390******************************************************************************
391*
392* @brief  This function packs residue of an i16x16 luma mb for entropy coding
393*
394* @par   Description
395*  An i16 macro block contains two classes of units, dc 4x4 block and
396*  4x4 ac blocks. while packing the mb, the dc block is sent first, and
397*  the 16 ac blocks are sent next in scan order. Each and every block is
398*  represented by 3 parameters (nnz, significant coefficient map and the
399*  residue coefficients itself). If a 4x4 unit does not have any coefficients
400*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
401*  sent in scan order.
402*
403*  The first byte of each block will be nnz of the block, if it is non zero,
404*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
405*  This is repeated for 1 dc + 16 ac blocks.
406*
407* @param[in]  pi2_res_mb
408*  pointer to residue mb
409*
410* @param[in, out]  pv_mb_coeff_data
411*  buffer pointing to packed residue coefficients
412*
413* @param[in]  u4_res_strd
414*  residual block stride
415*
416* @param[out]  u1_cbp_l
417*  coded block pattern luma
418*
419* @param[in]   pu1_nnz
420*  number of non zero coefficients in each 4x4 unit
421*
422* @param[out]
423*  Control signal for inverse transform of 16x16 blocks
424*
425* @return none
426*
427* @ remarks
428*
429******************************************************************************
430*/
431void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
432                          WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz,
433                          UWORD32 *pu4_cntrl);
434
435/**
436******************************************************************************
437*
438* @brief  This function packs residue of an i8x8 chroma mb for entropy coding
439*
440* @par   Description
441*  An i8 chroma macro block contains two classes of units, dc 2x2 block and
442*  4x4 ac blocks. while packing the mb, the dc block is sent first, and
443*  the 4 ac blocks are sent next in scan order. Each and every block is
444*  represented by 3 parameters (nnz, significant coefficient map and the
445*  residue coefficients itself). If a 4x4 unit does not have any coefficients
446*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
447*  sent in scan order.
448*
449*  The first byte of each block will be nnz of the block, if it is non zero,
450*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
451*  This is repeated for 1 dc + 4 ac blocks.
452*
453* @param[in]  pi2_res_mb
454*  pointer to residue mb
455*
456* @param[in, out]  pv_mb_coeff_data
457*  buffer pointing to packed residue coefficients
458*
459* @param[in]  u4_res_strd
460*  residual block stride
461*
462* @param[out]  u1_cbp_c
463*  coded block pattern chroma
464*
465* @param[in]   pu1_nnz
466*  number of non zero coefficients in each 4x4 unit
467*
468* @param[out]   pu1_nnz
469*  Control signal for inverse transform
470*
471* @param[in]   u4_swap_uv
472*  Swaps the order of U and V planes in entropy bitstream
473*
474* @return none
475*
476* @ remarks
477*
478******************************************************************************
479*/
480void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
481                      WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz,
482                      UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl,
483                      UWORD32 u4_swap_uv);
484
485/**
486*******************************************************************************
487*
488* @brief performs luma core coding when intra mode is i16x16
489*
490* @par Description:
491*  If the current mb is to be coded as intra of mb type i16x16, the mb is first
492*  predicted using one of i16x16 prediction filters, basing on the intra mode
493*  chosen. Then, error is computed between the input blk and the estimated blk.
494*  This error is transformed (hierarchical transform i.e., dct followed by hada-
495*  -mard), quantized. The quantized coefficients are packed in scan order for
496*  entropy coding.
497*
498* @param[in] ps_proc_ctxt
499*  pointer to the current macro block context
500*
501* @returns u1_cbp_l
502*  coded block pattern luma
503*
504* @remarks none
505*
506*******************************************************************************
507*/
508UWORD8 ih264e_code_luma_intra_macroblock_16x16
509        (
510            process_ctxt_t *ps_proc
511        );
512
513/**
514*******************************************************************************
515*
516* @brief performs luma core coding when intra mode is i4x4
517*
518* @par Description:
519*  If the current mb is to be coded as intra of mb type i4x4, the mb is first
520*  predicted using one of i4x4 prediction filters, basing on the intra mode
521*  chosen. Then, error is computed between the input blk and the estimated blk.
522*  This error is dct transformed and quantized. The quantized coefficients are
523*  packed in scan order for entropy coding.
524*
525* @param[in] ps_proc_ctxt
526*  pointer to the current macro block context
527*
528* @returns u1_cbp_l
529*  coded block pattern luma
530*
531* @remarks
532*  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
533*  mentioned in h.264 specification
534*
535*******************************************************************************
536*/
537UWORD8 ih264e_code_luma_intra_macroblock_4x4
538        (
539            process_ctxt_t *ps_proc
540        );
541
542/**
543*******************************************************************************
544*
545* @brief performs luma core coding when intra mode is i4x4
546*
547* @par Description:
548*  If the current mb is to be coded as intra of mb type i4x4, the mb is first
549*  predicted using one of i4x4 prediction filters, basing on the intra mode
550*  chosen. Then, error is computed between the input blk and the estimated blk.
551*  This error is dct transformed and quantized. The quantized coefficients are
552*  packed in scan order for entropy coding.
553*
554* @param[in] ps_proc_ctxt
555*  pointer to the current macro block context
556*
557* @returns u1_cbp_l
558*  coded block pattern luma
559*
560* @remarks
561*  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
562*  mentioned in h.264 specification
563*
564*******************************************************************************
565*/
566UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on
567        (
568            process_ctxt_t *ps_proc
569        );
570
571/**
572*******************************************************************************
573*
574* @brief performs chroma core coding for intra macro blocks
575*
576* @par Description:
577*  If the current MB is to be intra coded with mb type chroma I8x8, the MB is
578*  first predicted using intra 8x8 prediction filters. The predicted data is
579*  compared with the input for error and the error is transformed. The DC
580*  coefficients of each transformed sub blocks are further transformed using
581*  Hadamard transform. The resulting coefficients are quantized, packed and sent
582*  for entropy coding.
583*
584* @param[in] ps_proc_ctxt
585*  pointer to the current macro block context
586*
587* @returns u1_cbp_c
588*  coded block pattern chroma
589*
590* @remarks
591*  The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
592*  mentioned in h.264 specification
593*
594*******************************************************************************
595*/
596UWORD8 ih264e_code_chroma_intra_macroblock_8x8
597        (
598            process_ctxt_t *ps_proc
599        );
600
601/**
602*******************************************************************************
603* @brief performs luma core coding when  mode is inter
604*
605* @par Description:
606*  If the current mb is to be coded as inter predicted mb,based on the sub mb
607*  partitions and corresponding motion vectors generated by ME, prediction is done.
608*  Then, error is computed between the input blk and the estimated blk.
609*  This error is transformed ( dct and with out hadamard), quantized. The
610*  quantized coefficients are packed in scan order for entropy coding.
611*
612* @param[in] ps_proc_ctxt
613*  pointer to the current macro block context
614*
615* @returns u1_cbp_l
616*  coded block pattern luma
617*
618* @remarks none
619*
620*******************************************************************************
621*/
622UWORD8 ih264e_code_luma_inter_macroblock_16x16
623        (
624            process_ctxt_t *ps_proc
625        );
626
627/**
628*******************************************************************************
629* @brief performs chroma core coding for inter macro blocks
630*
631* @par Description:
632*  If the current mb is to be coded as inter predicted mb, based on the sub mb
633*  partitions and corresponding motion vectors generated by ME, prediction is done.
634*  Then, error is computed between the input blk and the estimated blk.
635*  This error is transformed, quantized. The quantized coefficients
636*  are packed in scan order for entropy coding.
637*
638* @param[in] ps_proc_ctxt
639*  pointer to the current macro block context
640*
641* @returns u1_cbp_l
642*  coded block pattern luma
643*
644* @remarks none
645*
646*******************************************************************************
647*/
648UWORD8 ih264e_code_chroma_inter_macroblock_8x8
649        (
650            process_ctxt_t *ps_proc
651        );
652
653#endif /* IH264E_CORE_CODING_H_ */
654