ih264e_core_coding.h revision 8d3d303c7942ced6a987a52db8977d768dc3605f
1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20 21/** 22****************************************************************************** 23* @file 24* ih264e_core_coding.h 25* 26* @brief 27* This file contains extern declarations of core coding routines 28* 29* @author 30* ittiam 31* 32* @remarks 33* none 34****************************************************************************** 35*/ 36 37#ifndef IH264E_CORE_CODING_H_ 38#define IH264E_CORE_CODING_H_ 39 40/*****************************************************************************/ 41/* Constant Macros */ 42/*****************************************************************************/ 43 44/** 45****************************************************************************** 46 * @brief Enable/Disable Hadamard transform of DC Coeff's 47****************************************************************************** 48 */ 49#define DISABLE_DC_TRANSFORM 0 50#define ENABLE_DC_TRANSFORM 1 51 52/** 53******************************************************************************* 54 * @brief bit masks for DC and AC control flags 55******************************************************************************* 56 */ 57 58#define DC_COEFF_CNT_LUMA_MB 16 59#define NUM_4X4_BLKS_LUMA_MB_ROW 4 60#define NUM_LUMA4x4_BLOCKS_IN_MB 16 61#define NUM_CHROMA4x4_BLOCKS_IN_MB 8 62 63#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4 64#define SIZE_4X4_BLK_VERT TRANS_SIZE_4 65 66#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF 67#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000 68 69#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000 70#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000 71 72#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000 73#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00 74 75#define CNTRL_FLAG_AC_MASK_CHROMA ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V ) 76#define CNTRL_FLAG_DC_MASK_CHROMA ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V ) 77 78#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000 79 80/** 81******************************************************************************* 82 * @brief macros for transforms 83******************************************************************************* 84 */ 85#define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl, blk_lin_id) \ 86{ \ 87 blk_lin_id = CLZ(u4_cntrl); \ 88 u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \ 89}; 90 91#define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \ 92{ \ 93 i4_offset_x = (u4_blk_id % 4) << 2; \ 94 i4_offset_y = (u4_blk_id / 4) << 2; \ 95} 96 97#define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \ 98{ \ 99 i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3); \ 100 i4_offset_y = (u4_blk_id & 0x2) << 1; \ 101} 102 103 104/*****************************************************************************/ 105/* Function Declarations */ 106/*****************************************************************************/ 107 108/** 109******************************************************************************* 110* 111* @brief 112* This function performs does the DCT transform then Hadamard transform 113* and quantization for a macroblock when the mb mode is intra 16x16 mode 114* 115* @par Description: 116* First cf4 is done on all 16 4x4 blocks of the 16x16 input block. 117* Then hadamard transform is done on the DC coefficients 118* Quantization is then performed on the 16x16 block, 4x4 wise 119* 120* @param[in] pu1_src 121* Pointer to source sub-block 122* 123* @param[in] pu1_pred 124* Pointer to prediction sub-block 125* 126* @param[in] pi2_out 127* Pointer to residual sub-block 128* The output will be in linear format 129* The first 16 continuous locations will contain the values of Dc block 130* After DC block and a stride 1st AC block will follow 131* After one more stride next AC block will follow 132* The blocks will be in raster scan order 133* 134* @param[in] src_strd 135* Source stride 136* 137* @param[in] pred_strd 138* Prediction stride 139* 140* @param[in] dst_strd 141* Destination stride 142* 143* @param[in] pu2_scale_matrix 144* The quantization matrix for 4x4 transform 145* 146* @param[in] pu2_threshold_matrix 147* Threshold matrix 148* 149* @param[in] u4_qbits 150* 15+QP/6 151* 152* @param[in] u4_round_factor 153* Round factor for quant 154* 155* @param[out] pu1_nnz 156* Memory to store the non-zeros after transform 157* The first byte will be the nnz of DC block 158* From the next byte the AC nnzs will be stored in raster scan order 159* 160* @param u4_dc_flag 161* Signals if Dc transform is to be done or not 162* 1 -> Dc transform will be done 163* 0 -> Dc transform will not be done 164* 165* @remarks 166* 167******************************************************************************* 168*/ 169void ih264e_luma_16x16_resi_trans_dctrans_quant( 170 codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred, 171 WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, 172 WORD32 dst_strd, const UWORD16 *pu2_scale_matrix, 173 const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, 174 UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag); 175 176/** 177******************************************************************************* 178* 179* @brief 180* This function performs the intra 16x16 inverse transform process for H264 181* it includes inverse Dc transform, inverse quant and then inverse transform 182* 183* @par Description: 184* 185* @param[in] pi2_src 186* Input data, 16x16 size 187* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion 188* after a stride 1st AC clock will be present again in raster can order 189* Then each AC block of the 16x16 block will follow in raster scan order 190* 191* @param[in] pu1_pred 192* The predicted data, 16x16 size 193* Block by block form 194* 195* @param[in] pu1_out 196* Output 16x16 197* In block by block form 198* 199* @param[in] src_strd 200* Source stride 201* 202* @param[in] pred_strd 203* input stride for prediction buffer 204* 205* @param[in] out_strd 206* input stride for output buffer 207* 208* @param[in] pu2_iscale_mat 209* Inverse quantization matrix for 4x4 transform 210* 211* @param[in] pu2_weigh_mat 212* weight matrix of 4x4 transform 213* 214* @param[in] qp_div 215* QP/6 216* 217* @param[in] pi4_tmp 218* Input temporary buffer 219* needs to be at least 20 in size 220* 221* @param[in] pu4_cntrl 222* Controls the transform path 223* total Last 17 bits are used 224* the 16th th bit will correspond to DC block 225* and 32-17 will correspond to the ac blocks in raster scan order 226* bit equaling zero indicates that the entire 4x4 block is zero for DC 227* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero 228* 229* @param[in] pi4_tmp 230* Input temporary buffer 231* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size 232* 233* @returns 234* none 235* 236* @remarks 237* The all zero case must be taken care outside 238* 239******************************************************************************* 240*/ 241void ih264e_luma_16x16_idctrans_iquant_itrans_recon( 242 codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred, 243 UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, 244 WORD32 out_strd, const UWORD16 *pu2_iscale_mat, 245 const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl, 246 UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp); 247 248/** 249******************************************************************************* 250* 251* @brief 252* This function performs does the DCT transform then Hadamard transform 253* and quantization for a chroma macroblock 254* 255* @par Description: 256* First cf4 is done on all 16 4x4 blocks of the 8x8input block 257* Then hadamard transform is done on the DC coefficients 258* Quantization is then performed on the 8x8 block, 4x4 wise 259* 260* @param[in] pu1_src 261* Pointer to source sub-block 262* The input is in interleaved format for two chroma planes 263* 264* @param[in] pu1_pred 265* Pointer to prediction sub-block 266* Prediction is in inter leaved format 267* 268* @param[in] pi2_out 269* Pointer to residual sub-block 270* The output will be in linear format 271* The first 4 continuous locations will contain the values of DC block for U 272* and then next 4 will contain for V. 273* After DC block and a stride 1st AC block of U plane will follow 274* After one more stride next AC block of V plane will follow 275* The blocks will be in raster scan order 276* 277* After all the AC blocks of U plane AC blocks of V plane will follow in exact 278* same way 279* 280* @param[in] src_strd 281* Source stride 282* 283* @param[in] pred_strd 284* Prediction stride 285* 286* @param[in] dst_strd 287* Destination stride 288* 289* @param[in] pu2_scale_matrix 290* The quantization matrix for 4x4 transform 291* 292* @param[in] pu2_threshold_matrix 293* Threshold matrix 294* 295* @param[in] u4_qbits 296* 15+QP/6 297* 298* @param[in] u4_round_factor 299* Round factor for quant 300* 301* @param[out] pu1_nnz 302* Memory to store the non-zeros after transform 303* The first byte will be the nnz od DC block for U plane 304* From the next byte the AC nnzs will be storerd in raster scan order 305* The fifth byte will be nnz of Dc block of V plane 306* Then Ac blocks will follow 307* 308* @param u4_dc_flag 309* Signals if Dc transform is to be done or not 310* 1 -> Dc transform will be done 311* 0 -> Dc transform will not be done 312* 313* @remarks 314* 315******************************************************************************* 316*/ 317void ih264e_chroma_8x8_resi_trans_dctrans_quant( 318 codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred, 319 WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, 320 WORD32 out_strd, const UWORD16 *pu2_scale_matrix, 321 const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, 322 UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c); 323 324/** 325******************************************************************************* 326* @brief 327* This function performs the inverse transform with process for chroma MB of H264 328* 329* @par Description: 330* Does inverse DC transform ,inverse quantization inverse transform 331* 332* @param[in] pi2_src 333* Input data, 16x16 size 334* The input is in the form of, first 4 locations will contain DC coeffs of 335* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane 336* in raster scan order will follow, each block as linear array in raster scan order. 337* After a stride next AC block will follow. After all AC blocks of U plane 338* V plane AC blocks will follow in exact same order. 339* 340* @param[in] pu1_pred 341* The predicted data, 8x16 size, U and V interleaved 342* 343* @param[in] pu1_out 344* Output 8x16, U and V interleaved 345* 346* @param[in] src_strd 347* Source stride 348* 349* @param[in] pred_strd 350* input stride for prediction buffer 351* 352* @param[in] out_strd 353* input stride for output buffer 354* 355* @param[in] pu2_iscale_mat 356* Inverse quantization martix for 4x4 transform 357* 358* @param[in] pu2_weigh_mat 359* weight matrix of 4x4 transform 360* 361* @param[in] qp_div 362* QP/6 363* 364* @param[in] pi4_tmp 365* Input temporary buffer 366* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes 367* in size 368* 369* @param[in] pu4_cntrl 370* Controls the transform path 371* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block 372* 32-28 bits will indicate AC blocks of U plane in raster scan order 373* 27-23 bits will indicate AC blocks of V plane in rater scan order 374* The bit 1 implies that there is at least one non zero coff in a block 375* 376* @returns 377* none 378* 379* @remarks 380******************************************************************************* 381*/ 382void ih264e_chroma_8x8_idctrans_iquant_itrans_recon( 383 codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred, 384 UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd, 385 WORD32 out_strd, const UWORD16 *pu2_iscale_mat, 386 const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl, 387 WORD32 *pi4_tmp); 388 389/** 390****************************************************************************** 391* 392* @brief This function packs residue of an i16x16 luma mb for entropy coding 393* 394* @par Description 395* An i16 macro block contains two classes of units, dc 4x4 block and 396* 4x4 ac blocks. while packing the mb, the dc block is sent first, and 397* the 16 ac blocks are sent next in scan order. Each and every block is 398* represented by 3 parameters (nnz, significant coefficient map and the 399* residue coefficients itself). If a 4x4 unit does not have any coefficients 400* then only nnz is sent. Inside a 4x4 block the individual coefficients are 401* sent in scan order. 402* 403* The first byte of each block will be nnz of the block, if it is non zero, 404* a 2 byte significance map is sent. This is followed by nonzero coefficients. 405* This is repeated for 1 dc + 16 ac blocks. 406* 407* @param[in] pi2_res_mb 408* pointer to residue mb 409* 410* @param[in, out] pv_mb_coeff_data 411* buffer pointing to packed residue coefficients 412* 413* @param[in] u4_res_strd 414* residual block stride 415* 416* @param[out] u1_cbp_l 417* coded block pattern luma 418* 419* @param[in] pu1_nnz 420* number of non zero coefficients in each 4x4 unit 421* 422* @param[out] 423* Control signal for inverse transform of 16x16 blocks 424* 425* @return none 426* 427* @ remarks 428* 429****************************************************************************** 430*/ 431void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, 432 WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, 433 UWORD32 *pu4_cntrl); 434 435/** 436****************************************************************************** 437* 438* @brief This function packs residue of an i8x8 chroma mb for entropy coding 439* 440* @par Description 441* An i8 chroma macro block contains two classes of units, dc 2x2 block and 442* 4x4 ac blocks. while packing the mb, the dc block is sent first, and 443* the 4 ac blocks are sent next in scan order. Each and every block is 444* represented by 3 parameters (nnz, significant coefficient map and the 445* residue coefficients itself). If a 4x4 unit does not have any coefficients 446* then only nnz is sent. Inside a 4x4 block the individual coefficients are 447* sent in scan order. 448* 449* The first byte of each block will be nnz of the block, if it is non zero, 450* a 2 byte significance map is sent. This is followed by nonzero coefficients. 451* This is repeated for 1 dc + 4 ac blocks. 452* 453* @param[in] pi2_res_mb 454* pointer to residue mb 455* 456* @param[in, out] pv_mb_coeff_data 457* buffer pointing to packed residue coefficients 458* 459* @param[in] u4_res_strd 460* residual block stride 461* 462* @param[out] u1_cbp_c 463* coded block pattern chroma 464* 465* @param[in] pu1_nnz 466* number of non zero coefficients in each 4x4 unit 467* 468* @param[out] pu1_nnz 469* Control signal for inverse transform 470* 471* @param[in] u4_swap_uv 472* Swaps the order of U and V planes in entropy bitstream 473* 474* @return none 475* 476* @ remarks 477* 478****************************************************************************** 479*/ 480void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, 481 WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz, 482 UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl, 483 UWORD32 u4_swap_uv); 484 485/** 486******************************************************************************* 487* 488* @brief performs luma core coding when intra mode is i16x16 489* 490* @par Description: 491* If the current mb is to be coded as intra of mb type i16x16, the mb is first 492* predicted using one of i16x16 prediction filters, basing on the intra mode 493* chosen. Then, error is computed between the input blk and the estimated blk. 494* This error is transformed (hierarchical transform i.e., dct followed by hada- 495* -mard), quantized. The quantized coefficients are packed in scan order for 496* entropy coding. 497* 498* @param[in] ps_proc_ctxt 499* pointer to the current macro block context 500* 501* @returns u1_cbp_l 502* coded block pattern luma 503* 504* @remarks none 505* 506******************************************************************************* 507*/ 508UWORD8 ih264e_code_luma_intra_macroblock_16x16 509 ( 510 process_ctxt_t *ps_proc 511 ); 512 513/** 514******************************************************************************* 515* 516* @brief performs luma core coding when intra mode is i4x4 517* 518* @par Description: 519* If the current mb is to be coded as intra of mb type i4x4, the mb is first 520* predicted using one of i4x4 prediction filters, basing on the intra mode 521* chosen. Then, error is computed between the input blk and the estimated blk. 522* This error is dct transformed and quantized. The quantized coefficients are 523* packed in scan order for entropy coding. 524* 525* @param[in] ps_proc_ctxt 526* pointer to the current macro block context 527* 528* @returns u1_cbp_l 529* coded block pattern luma 530* 531* @remarks 532* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order 533* mentioned in h.264 specification 534* 535******************************************************************************* 536*/ 537UWORD8 ih264e_code_luma_intra_macroblock_4x4 538 ( 539 process_ctxt_t *ps_proc 540 ); 541 542/** 543******************************************************************************* 544* 545* @brief performs luma core coding when intra mode is i4x4 546* 547* @par Description: 548* If the current mb is to be coded as intra of mb type i4x4, the mb is first 549* predicted using one of i4x4 prediction filters, basing on the intra mode 550* chosen. Then, error is computed between the input blk and the estimated blk. 551* This error is dct transformed and quantized. The quantized coefficients are 552* packed in scan order for entropy coding. 553* 554* @param[in] ps_proc_ctxt 555* pointer to the current macro block context 556* 557* @returns u1_cbp_l 558* coded block pattern luma 559* 560* @remarks 561* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order 562* mentioned in h.264 specification 563* 564******************************************************************************* 565*/ 566UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on 567 ( 568 process_ctxt_t *ps_proc 569 ); 570 571/** 572******************************************************************************* 573* 574* @brief performs chroma core coding for intra macro blocks 575* 576* @par Description: 577* If the current MB is to be intra coded with mb type chroma I8x8, the MB is 578* first predicted using intra 8x8 prediction filters. The predicted data is 579* compared with the input for error and the error is transformed. The DC 580* coefficients of each transformed sub blocks are further transformed using 581* Hadamard transform. The resulting coefficients are quantized, packed and sent 582* for entropy coding. 583* 584* @param[in] ps_proc_ctxt 585* pointer to the current macro block context 586* 587* @returns u1_cbp_c 588* coded block pattern chroma 589* 590* @remarks 591* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order 592* mentioned in h.264 specification 593* 594******************************************************************************* 595*/ 596UWORD8 ih264e_code_chroma_intra_macroblock_8x8 597 ( 598 process_ctxt_t *ps_proc 599 ); 600 601/** 602******************************************************************************* 603* @brief performs luma core coding when mode is inter 604* 605* @par Description: 606* If the current mb is to be coded as inter predicted mb,based on the sub mb 607* partitions and corresponding motion vectors generated by ME, prediction is done. 608* Then, error is computed between the input blk and the estimated blk. 609* This error is transformed ( dct and with out hadamard), quantized. The 610* quantized coefficients are packed in scan order for entropy coding. 611* 612* @param[in] ps_proc_ctxt 613* pointer to the current macro block context 614* 615* @returns u1_cbp_l 616* coded block pattern luma 617* 618* @remarks none 619* 620******************************************************************************* 621*/ 622UWORD8 ih264e_code_luma_inter_macroblock_16x16 623 ( 624 process_ctxt_t *ps_proc 625 ); 626 627/** 628******************************************************************************* 629* @brief performs chroma core coding for inter macro blocks 630* 631* @par Description: 632* If the current mb is to be coded as inter predicted mb, based on the sub mb 633* partitions and corresponding motion vectors generated by ME, prediction is done. 634* Then, error is computed between the input blk and the estimated blk. 635* This error is transformed, quantized. The quantized coefficients 636* are packed in scan order for entropy coding. 637* 638* @param[in] ps_proc_ctxt 639* pointer to the current macro block context 640* 641* @returns u1_cbp_l 642* coded block pattern luma 643* 644* @remarks none 645* 646******************************************************************************* 647*/ 648UWORD8 ih264e_code_chroma_inter_macroblock_8x8 649 ( 650 process_ctxt_t *ps_proc 651 ); 652 653#endif /* IH264E_CORE_CODING_H_ */ 654