ih264e_process.c revision 3749f6f435e79624f72841e866245d84195551cd
1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20 21/** 22******************************************************************************* 23* @file 24* ih264e_process.c 25* 26* @brief 27* Contains functions for codec thread 28* 29* @author 30* Harish 31* 32* @par List of Functions: 33* - ih264e_generate_sps_pps() 34* - ih264e_init_entropy_ctxt() 35* - ih264e_entropy() 36* - ih264e_pack_header_data() 37* - ih264e_update_proc_ctxt() 38* - ih264e_init_proc_ctxt() 39* - ih264e_pad_recon_buffer() 40* - ih264e_dblk_pad_hpel_processing_n_mbs() 41* - ih264e_process() 42* - ih264e_set_rc_pic_params() 43* - ih264e_update_rc_post_enc() 44* - ih264e_process_thread() 45* 46* @remarks 47* None 48* 49******************************************************************************* 50*/ 51 52/*****************************************************************************/ 53/* File Includes */ 54/*****************************************************************************/ 55 56/* System include files */ 57#include <stdio.h> 58#include <stddef.h> 59#include <stdlib.h> 60#include <string.h> 61#include <limits.h> 62#include <assert.h> 63 64/* User include files */ 65#include "ih264_typedefs.h" 66#include "iv2.h" 67#include "ive2.h" 68#include "ih264_defs.h" 69#include "ih264_debug.h" 70#include "ime_distortion_metrics.h" 71#include "ime_defs.h" 72#include "ime_structs.h" 73#include "ih264_error.h" 74#include "ih264_structs.h" 75#include "ih264_trans_quant_itrans_iquant.h" 76#include "ih264_inter_pred_filters.h" 77#include "ih264_mem_fns.h" 78#include "ih264_padding.h" 79#include "ih264_intra_pred_filters.h" 80#include "ih264_deblk_edge_filters.h" 81#include "ih264_cabac_tables.h" 82#include "ih264_platform_macros.h" 83#include "ih264_macros.h" 84#include "ih264_buf_mgr.h" 85#include "ih264e_error.h" 86#include "ih264e_bitstream.h" 87#include "ih264_common_tables.h" 88#include "ih264_list.h" 89#include "ih264e_defs.h" 90#include "irc_cntrl_param.h" 91#include "irc_frame_info_collector.h" 92#include "ih264e_rate_control.h" 93#include "ih264e_cabac_structs.h" 94#include "ih264e_structs.h" 95#include "ih264e_cabac.h" 96#include "ih264e_process.h" 97#include "ithread.h" 98#include "ih264e_intra_modes_eval.h" 99#include "ih264e_encode_header.h" 100#include "ih264e_globals.h" 101#include "ih264e_config.h" 102#include "ih264e_trace.h" 103#include "ih264e_statistics.h" 104#include "ih264_cavlc_tables.h" 105#include "ih264e_cavlc.h" 106#include "ih264e_deblk.h" 107#include "ih264e_me.h" 108#include "ih264e_debug.h" 109#include "ih264e_master.h" 110#include "ih264e_utils.h" 111#include "irc_mem_req_and_acq.h" 112#include "irc_rate_control_api.h" 113#include "ih264e_platform_macros.h" 114#include "ime_statistics.h" 115 116 117/*****************************************************************************/ 118/* Function Definitions */ 119/*****************************************************************************/ 120 121/** 122****************************************************************************** 123* 124* @brief This function generates sps, pps set on request 125* 126* @par Description 127* When the encoder is set in header generation mode, the following function 128* is called. This generates sps and pps headers and returns the control back 129* to caller. 130* 131* @param[in] ps_codec 132* pointer to codec context 133* 134* @return success or failure error code 135* 136****************************************************************************** 137*/ 138IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) 139{ 140 /* choose between ping-pong process buffer set */ 141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; 142 143 /* entropy ctxt */ 144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; 145 146 /* Bitstream structure */ 147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 148 149 /* sps */ 150 sps_t *ps_sps = NULL; 151 152 /* pps */ 153 pps_t *ps_pps = NULL; 154 155 /* output buff */ 156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; 157 158 159 /********************************************************************/ 160 /* initialize the bit stream buffer */ 161 /********************************************************************/ 162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); 163 164 /********************************************************************/ 165 /* BEGIN HEADER GENERATION */ 166 /********************************************************************/ 167 /*ps_codec->i4_pps_id ++;*/ 168 ps_codec->i4_pps_id %= MAX_PPS_CNT; 169 170 /*ps_codec->i4_sps_id ++;*/ 171 ps_codec->i4_sps_id %= MAX_SPS_CNT; 172 173 /* populate sps header */ 174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; 175 ih264e_populate_sps(ps_codec, ps_sps); 176 177 /* populate pps header */ 178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; 179 ih264e_populate_pps(ps_codec, ps_pps); 180 181 ps_entropy->i4_error_code = IH264E_SUCCESS; 182 183 /* generate sps */ 184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps); 185 186 /* generate pps */ 187 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 188 189 /* queue output buffer */ 190 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; 191 192 return ps_entropy->i4_error_code; 193} 194 195/** 196******************************************************************************* 197* 198* @brief initialize entropy context. 199* 200* @par Description: 201* Before invoking the call to perform to entropy coding the entropy context 202* associated with the job needs to be initialized. This involves the start 203* mb address, end mb address, slice index and the pointer to location at 204* which the mb residue info and mb header info are packed. 205* 206* @param[in] ps_proc 207* Pointer to the current process context 208* 209* @returns error status 210* 211* @remarks none 212* 213******************************************************************************* 214*/ 215IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) 216{ 217 /* codec context */ 218 codec_t *ps_codec = ps_proc->ps_codec; 219 220 /* entropy ctxt */ 221 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 222 223 /* start address */ 224 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; 225 226 /* end address */ 227 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; 228 229 /* slice index */ 230 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; 231 232 /* sof */ 233 /* @ start of frame or start of a new slice, set sof flag */ 234 if (ps_entropy->i4_mb_start_add == 0) 235 { 236 ps_entropy->i4_sof = 1; 237 } 238 239 if (ps_entropy->i4_mb_x == 0) 240 { 241 /* packed mb coeff data */ 242 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 243 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 244 245 /* packed mb header data */ 246 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 247 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 248 } 249 250 return IH264E_SUCCESS; 251} 252 253/** 254******************************************************************************* 255* 256* @brief entry point for entropy coding 257* 258* @par Description 259* This function calls lower level functions to perform entropy coding for a 260* group (n rows) of mb's. After encoding 1 row of mb's, the function takes 261* back the control, updates the ctxt and calls lower level functions again. 262* This process is repeated till all the rows or group of mb's (which ever is 263* minimum) are coded 264* 265* @param[in] ps_proc 266* process context 267* 268* @returns error status 269* 270* @remarks 271* 272******************************************************************************* 273*/ 274 275IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) 276{ 277 /* codec context */ 278 codec_t *ps_codec = ps_proc->ps_codec; 279 280 /* entropy context */ 281 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 282 283 /* cabac context */ 284 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; 285 286 /* sps */ 287 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); 288 289 /* pps */ 290 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); 291 292 /* slice header */ 293 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); 294 295 /* slice type */ 296 WORD32 i4_slice_type = ps_proc->i4_slice_type; 297 298 /* Bitstream structure */ 299 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 300 301 /* output buff */ 302 out_buf_t s_out_buf; 303 304 /* proc map */ 305 UWORD8 *pu1_proc_map; 306 307 /* entropy map */ 308 UWORD8 *pu1_entropy_map_curr; 309 310 /* proc base idx */ 311 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1; 312 313 /* temp var */ 314 WORD32 i4_wd_mbs, i4_ht_mbs; 315 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; 316 WORD32 bitstream_start_offset, bitstream_end_offset; 317 /********************************************************************/ 318 /* BEGIN INIT */ 319 /********************************************************************/ 320 321 /* entropy encode start address */ 322 u4_mb_idx = ps_entropy->i4_mb_start_add; 323 324 /* entropy encode end address */ 325 u4_mb_end_idx = ps_entropy->i4_mb_end_add; 326 327 /* width in mbs */ 328 i4_wd_mbs = ps_entropy->i4_wd_mbs; 329 330 /* height in mbs */ 331 i4_ht_mbs = ps_entropy->i4_ht_mbs; 332 333 /* total mb cnt */ 334 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; 335 336 /* proc map */ 337 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 338 339 /* entropy map */ 340 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 341 342 /********************************************************************/ 343 /* @ start of frame / slice, */ 344 /* initialize the output buffer, */ 345 /* initialize the bit stream buffer, */ 346 /* check if sps and pps headers have to be generated, */ 347 /* populate and generate slice header */ 348 /********************************************************************/ 349 if (ps_entropy->i4_sof) 350 { 351 /********************************************************************/ 352 /* initialize the output buffer */ 353 /********************************************************************/ 354 s_out_buf = ps_codec->as_out_buf[ctxt_sel]; 355 356 /* is last frame to encode */ 357 s_out_buf.u4_is_last = ps_entropy->u4_is_last; 358 359 /* frame idx */ 360 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; 361 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; 362 363 /********************************************************************/ 364 /* initialize the bit stream buffer */ 365 /********************************************************************/ 366 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); 367 368 /********************************************************************/ 369 /* BEGIN HEADER GENERATION */ 370 /********************************************************************/ 371 if (1 == ps_entropy->i4_gen_header) 372 { 373 /* generate sps */ 374 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps); 375 376 /* generate pps */ 377 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 378 379 /* reset i4_gen_header */ 380 ps_entropy->i4_gen_header = 0; 381 } 382 383 /* populate slice header */ 384 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); 385 386 /* generate slice header */ 387 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, 388 ps_pps, ps_sps); 389 390 /* once start of frame / slice is done, you can reset it */ 391 /* it is the responsibility of the caller to set this flag */ 392 ps_entropy->i4_sof = 0; 393 394 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 395 { 396 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 397 BITSTREAM_FLUSH(ps_bitstrm); 398 ih264e_init_cabac_ctxt(ps_entropy); 399 } 400 } 401 402 /* begin entropy coding for the mb set */ 403 while (u4_mb_idx < u4_mb_end_idx) 404 { 405 /* init ptrs/indices */ 406 if (ps_entropy->i4_mb_x == i4_wd_mbs) 407 { 408 ps_entropy->i4_mb_y++; 409 ps_entropy->i4_mb_x = 0; 410 411 /* packed mb coeff data */ 412 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 413 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 414 415 /* packed mb header data */ 416 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 417 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 418 419 /* proc map */ 420 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 421 422 /* entropy map */ 423 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 424 } 425 426 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); 427 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); 428 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); 429 430 /* wait until the curr mb is core coded */ 431 /* The wait for curr mb to be core coded is essential when entropy is launched 432 * as a separate job 433 */ 434 while (1) 435 { 436 volatile UWORD8 *pu1_buf1; 437 WORD32 idx = ps_entropy->i4_mb_x; 438 439 pu1_buf1 = pu1_proc_map + idx; 440 if (*pu1_buf1) 441 break; 442 ithread_yield(); 443 } 444 445 446 /* write mb layer */ 447 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); 448 /* Starting bitstream offset for header in bits */ 449 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); 450 451 /* set entropy map */ 452 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; 453 454 u4_mb_idx++; 455 ps_entropy->i4_mb_x++; 456 /* check for eof */ 457 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 458 { 459 if (ps_entropy->i4_mb_x < i4_wd_mbs) 460 { 461 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 462 } 463 } 464 465 if (ps_entropy->i4_mb_x == i4_wd_mbs) 466 { 467 /* if slices are enabled */ 468 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) 469 { 470 /* current slice index */ 471 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; 472 473 /* slice map */ 474 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; 475 476 /* No need to open a slice at end of frame. The current slice can be closed at the time 477 * of signaling eof flag. 478 */ 479 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx 480 != pu1_slice_idx[u4_mb_idx])) 481 { 482 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 483 { /* mb skip run */ 484 if ((i4_slice_type != ISLICE) 485 && *ps_entropy->pi4_mb_skip_run) 486 { 487 if (*ps_entropy->pi4_mb_skip_run) 488 { 489 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); 490 *ps_entropy->pi4_mb_skip_run = 0; 491 } 492 } 493 /* put rbsp trailing bits for the previous slice */ 494 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 495 } 496 else 497 { 498 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 499 } 500 501 /* update slice header pointer */ 502 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; 503 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; 504 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); 505 506 /* populate slice header */ 507 ps_entropy->i4_mb_start_add = u4_mb_idx; 508 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, 509 ps_sps); 510 511 /* generate slice header */ 512 ps_entropy->i4_error_code |= ih264e_generate_slice_header( 513 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); 514 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 515 { 516 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 517 BITSTREAM_FLUSH(ps_bitstrm); 518 ih264e_init_cabac_ctxt(ps_entropy); 519 } 520 } 521 else 522 { 523 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 524 && u4_mb_idx != u4_mb_cnt) 525 { 526 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 527 } 528 } 529 } 530 /* Dont execute any further instructions until store synchronization took place */ 531 DATA_SYNC(); 532 } 533 534 /* Ending bitstream offset for header in bits */ 535 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); 536 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += 537 bitstream_end_offset - bitstream_start_offset; 538 } 539 540 /* check for eof */ 541 if (u4_mb_idx == u4_mb_cnt) 542 { 543 /* set end of frame flag */ 544 ps_entropy->i4_eof = 1; 545 } 546 else 547 { 548 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 549 && ps_codec->s_cfg.e_slice_mode 550 != IVE_SLICE_MODE_BLOCKS) 551 { 552 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 553 } 554 } 555 556 if (ps_entropy->i4_eof) 557 { 558 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 559 { 560 /* mb skip run */ 561 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) 562 { 563 if (*ps_entropy->pi4_mb_skip_run) 564 { 565 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, 566 ps_entropy->i4_error_code, "mb skip run"); 567 *ps_entropy->pi4_mb_skip_run = 0; 568 } 569 } 570 /* put rbsp trailing bits */ 571 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 572 } 573 else 574 { 575 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 576 } 577 578 /* update current frame stats to rc library */ 579 { 580 /* number of bytes to stuff */ 581 WORD32 i4_stuff_bytes; 582 583 /* update */ 584 i4_stuff_bytes = ih264e_update_rc_post_enc( 585 ps_codec, ctxt_sel, 586 (ps_proc->ps_codec->i4_poc == 0)); 587 588 /* cbr rc - house keeping */ 589 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) 590 { 591 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; 592 } 593 else if (i4_stuff_bytes) 594 { 595 /* add filler nal units */ 596 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); 597 } 598 } 599 600 /* 601 *Frame number is to be incremented only if the current frame is a 602 * reference frame. After each successful frame encode, we increment 603 * frame number by 1 604 */ 605 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] 606 && ps_codec->u4_is_curr_frm_ref) 607 { 608 ps_codec->i4_frame_num++; 609 } 610 /********************************************************************/ 611 /* signal the output */ 612 /********************************************************************/ 613 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 614 ps_entropy->ps_bitstrm->u4_strm_buf_offset; 615 616 DEBUG("entropy status %x", ps_entropy->i4_error_code); 617 } 618 619 /* allow threads to dequeue entropy jobs */ 620 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; 621 622 return ps_entropy->i4_error_code; 623} 624 625/** 626******************************************************************************* 627* 628* @brief Packs header information of a mb in to a buffer 629* 630* @par Description: 631* After the deciding the mode info of a macroblock, the syntax elements 632* associated with the mb are packed and stored. The entropy thread unpacks 633* this buffer and generates the end bit stream. 634* 635* @param[in] ps_proc 636* Pointer to the current process context 637* 638* @returns error status 639* 640* @remarks none 641* 642******************************************************************************* 643*/ 644IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) 645{ 646 /* curr mb type */ 647 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 648 649 /* pack mb syntax layer of curr mb (used for entropy coding) */ 650 if (u4_mb_type == I4x4) 651 { 652 /* pointer to mb header storage space */ 653 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 654 655 /* temp var */ 656 WORD32 i4, byte; 657 658 /* mb type plus mode */ 659 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; 660 661 /* cbp */ 662 *pu1_ptr++ = ps_proc->u4_cbp; 663 664 /* mb qp delta */ 665 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 666 667 /* sub mb modes */ 668 for (i4 = 0; i4 < 16; i4 ++) 669 { 670 byte = 0; 671 672 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 673 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 674 { 675 byte |= 1; 676 } 677 else 678 { 679 680 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 681 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 682 { 683 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); 684 } 685 else 686 { 687 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; 688 } 689 } 690 691 i4++; 692 693 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 694 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 695 { 696 byte |= 16; 697 } 698 else 699 { 700 701 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 702 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 703 { 704 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); 705 } 706 else 707 { 708 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; 709 } 710 } 711 712 *pu1_ptr++ = byte; 713 } 714 715 /* end of mb layer */ 716 ps_proc->pv_mb_header_data = pu1_ptr; 717 } 718 else if (u4_mb_type == I16x16) 719 { 720 /* pointer to mb header storage space */ 721 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 722 723 /* mb type plus mode */ 724 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; 725 726 /* cbp */ 727 *pu1_ptr++ = ps_proc->u4_cbp; 728 729 /* mb qp delta */ 730 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 731 732 /* end of mb layer */ 733 ps_proc->pv_mb_header_data = pu1_ptr; 734 } 735 else if (u4_mb_type == P16x16) 736 { 737 /* pointer to mb header storage space */ 738 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 739 740 WORD16 *i2_mv_ptr; 741 742 /* mb type plus mode */ 743 *pu1_ptr++ = u4_mb_type; 744 745 /* cbp */ 746 *pu1_ptr++ = ps_proc->u4_cbp; 747 748 /* mb qp delta */ 749 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 750 751 i2_mv_ptr = (WORD16 *)pu1_ptr; 752 753 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 754 755 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 756 757 /* end of mb layer */ 758 ps_proc->pv_mb_header_data = i2_mv_ptr; 759 } 760 else if (u4_mb_type == PSKIP) 761 { 762 /* pointer to mb header storage space */ 763 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 764 765 /* mb type plus mode */ 766 *pu1_ptr++ = u4_mb_type; 767 768 /* end of mb layer */ 769 ps_proc->pv_mb_header_data = pu1_ptr; 770 } 771 else if(u4_mb_type == B16x16) 772 { 773 774 /* pointer to mb header storage space */ 775 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 776 777 WORD16 *i2_mv_ptr; 778 779 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 780 781 /* mb type plus mode */ 782 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 783 784 /* cbp */ 785 *pu1_ptr++ = ps_proc->u4_cbp; 786 787 /* mb qp delta */ 788 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 789 790 /* l0 & l1 me data */ 791 i2_mv_ptr = (WORD16 *)pu1_ptr; 792 793 if (u4_pred_mode != PRED_L1) 794 { 795 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx 796 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 797 798 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy 799 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 800 } 801 if (u4_pred_mode != PRED_L0) 802 { 803 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx 804 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; 805 806 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy 807 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; 808 } 809 810 /* end of mb layer */ 811 ps_proc->pv_mb_header_data = i2_mv_ptr; 812 813 } 814 else if(u4_mb_type == BDIRECT) 815 { 816 /* pointer to mb header storage space */ 817 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 818 819 /* mb type plus mode */ 820 *pu1_ptr++ = u4_mb_type; 821 822 /* cbp */ 823 *pu1_ptr++ = ps_proc->u4_cbp; 824 825 /* mb qp delta */ 826 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 827 828 ps_proc->pv_mb_header_data = pu1_ptr; 829 830 } 831 else if(u4_mb_type == BSKIP) 832 { 833 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 834 835 /* pointer to mb header storage space */ 836 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 837 838 /* mb type plus mode */ 839 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 840 841 /* end of mb layer */ 842 ps_proc->pv_mb_header_data = pu1_ptr; 843 } 844 845 return IH264E_SUCCESS; 846} 847 848/** 849******************************************************************************* 850* 851* @brief update process context after encoding an mb. This involves preserving 852* the current mb information for later use, initialize the proc ctxt elements to 853* encode next mb. 854* 855* @par Description: 856* This function performs house keeping tasks after encoding an mb. 857* After encoding an mb, various elements of the process context needs to be 858* updated to encode the next mb. For instance, the source, recon and reference 859* pointers, mb indices have to be adjusted to the next mb. The slice index of 860* the current mb needs to be updated. If mb qp modulation is enabled, then if 861* the qp changes the quant param structure needs to be updated. Also to encoding 862* the next mb, the current mb info is used as part of mode prediction or mv 863* prediction. Hence the current mb info has to preserved at top/top left/left 864* locations. 865* 866* @param[in] ps_proc 867* Pointer to the current process context 868* 869* @returns none 870* 871* @remarks none 872* 873******************************************************************************* 874*/ 875WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) 876{ 877 /* error status */ 878 WORD32 error_status = IH264_SUCCESS; 879 880 /* codec context */ 881 codec_t *ps_codec = ps_proc->ps_codec; 882 883 /* curr mb indices */ 884 WORD32 i4_mb_x = ps_proc->i4_mb_x; 885 WORD32 i4_mb_y = ps_proc->i4_mb_y; 886 887 /* mb syntax elements of neighbors */ 888 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; 889 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; 890 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; 891 892 /* curr mb type */ 893 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 894 895 /* curr mb type */ 896 UWORD32 u4_is_intra = ps_proc->u4_is_intra; 897 898 /* width in mbs */ 899 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 900 901 /*height in mbs*/ 902 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; 903 904 /* proc map */ 905 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); 906 907 /* deblk context */ 908 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 909 910 /* deblk bs context */ 911 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 912 913 /* top row motion vector info */ 914 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; 915 916 /* top left mb motion vector */ 917 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; 918 919 /* left mb motion vector */ 920 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; 921 922 /* sub mb modes */ 923 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); 924 925 /*************************************************************/ 926 /* During MV prediction, when top right mb is not available, */ 927 /* top left mb info. is used for prediction. Hence the curr */ 928 /* top, which will be top left for the next mb needs to be */ 929 /* preserved before updating it with curr mb info. */ 930 /*************************************************************/ 931 932 /* mb type, mb class, csbp */ 933 *ps_top_left_syn = *ps_top_syn; 934 935 if (ps_proc->i4_slice_type != ISLICE) 936 { 937 /*****************************************/ 938 /* update top left with top info results */ 939 /*****************************************/ 940 /* mv */ 941 *ps_top_left_mb_pu = *ps_top_row_pu; 942 } 943 944 /*************************************************/ 945 /* update top and left with curr mb info results */ 946 /*************************************************/ 947 948 /* mb type */ 949 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; 950 951 /* mb class */ 952 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; 953 954 /* csbp */ 955 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; 956 957 /* distortion */ 958 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; 959 960 if (u4_is_intra) 961 { 962 /* mb / sub mb modes */ 963 if (I16x16 == u4_mb_type) 964 { 965 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; 966 } 967 else if (I4x4 == u4_mb_type) 968 { 969 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 970 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 971 } 972 else if (I8x8 == u4_mb_type) 973 { 974 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 975 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 976 } 977 978 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) 979 { 980 /* mv */ 981 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 982 } 983 984 *ps_proc->pu4_mb_pu_cnt = 1; 985 } 986 else 987 { 988 /* mv */ 989 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 990 } 991 992 /* 993 * Mark that the MB has been coded intra 994 * So that future AIRs can skip it 995 */ 996 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; 997 998 /**************************************************/ 999 /* pack mb header info. for entropy coding */ 1000 /**************************************************/ 1001 ih264e_pack_header_data(ps_proc); 1002 1003 /* update previous mb qp */ 1004 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; 1005 1006 /* store qp */ 1007 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1008 1009 /* 1010 * We need to sync the cache to make sure that the nmv content of proc 1011 * is updated to cache properly 1012 */ 1013 DATA_SYNC(); 1014 1015 /* Just before finishing the row, enqueue the job in to entropy queue. 1016 * The master thread depending on its convenience shall dequeue it and 1017 * performs entropy. 1018 * 1019 * WARN !! Placing this block post proc map update can cause queuing of 1020 * entropy jobs in out of order. 1021 */ 1022 if (i4_mb_x == i4_wd_mbs - 1) 1023 { 1024 /* job structures */ 1025 job_t s_job; 1026 1027 /* job class */ 1028 s_job.i4_cmd = CMD_ENTROPY; 1029 1030 /* number of mbs to be processed in the current job */ 1031 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; 1032 1033 /* job start index x */ 1034 s_job.i2_mb_x = 0; 1035 1036 /* job start index y */ 1037 s_job.i2_mb_y = ps_proc->i4_mb_y; 1038 1039 /* proc base idx */ 1040 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ; 1041 1042 /* queue the job */ 1043 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); 1044 1045 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) 1046 ih264_list_terminate(ps_codec->pv_entropy_jobq); 1047 } 1048 1049 /* update proc map */ 1050 pu1_proc_map[i4_mb_x] = 1; 1051 1052 /**************************************************/ 1053 /* update proc ctxt elements for encoding next mb */ 1054 /**************************************************/ 1055 /* update indices */ 1056 i4_mb_x ++; 1057 ps_proc->i4_mb_x = i4_mb_x; 1058 1059 if (ps_proc->i4_mb_x == i4_wd_mbs) 1060 { 1061 ps_proc->i4_mb_y++; 1062 ps_proc->i4_mb_x = 0; 1063 } 1064 1065 /* update slice index */ 1066 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; 1067 1068 /* update buffers pointers */ 1069 ps_proc->pu1_src_buf_luma += MB_SIZE; 1070 ps_proc->pu1_rec_buf_luma += MB_SIZE; 1071 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; 1072 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; 1073 1074 /* 1075 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1076 * the stride per MB is MB_SIZE 1077 */ 1078 ps_proc->pu1_src_buf_chroma += MB_SIZE; 1079 ps_proc->pu1_rec_buf_chroma += MB_SIZE; 1080 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; 1081 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; 1082 1083 1084 1085 /* Reset cost, distortion params */ 1086 ps_proc->i4_mb_cost = INT_MAX; 1087 ps_proc->i4_mb_distortion = SHRT_MAX; 1088 1089 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; 1090 1091 ps_proc->pu4_mb_pu_cnt += 1; 1092 1093 /* Update colocated pu */ 1094 if (ps_proc->i4_slice_type == BSLICE) 1095 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); 1096 1097 /* deblk ctxts */ 1098 if (ps_proc->u4_disable_deblock_level != 1) 1099 { 1100 /* indices */ 1101 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1102 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1103 1104#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ 1105 ps_deblk->i4_mb_x ++; 1106 1107 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1108 /* 1109 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1110 * the stride per MB is MB_SIZE 1111 */ 1112 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1113#endif 1114 } 1115 1116 return error_status; 1117} 1118 1119/** 1120******************************************************************************* 1121* 1122* @brief initialize process context. 1123* 1124* @par Description: 1125* Before dispatching the current job to process thread, the process context 1126* associated with the job is initialized. Usually every job aims to encode one 1127* row of mb's. Basing on the row indices provided by the job, the process 1128* context's buffer ptrs, slice indices and other elements that are necessary 1129* during core-coding are initialized. 1130* 1131* @param[in] ps_proc 1132* Pointer to the current process context 1133* 1134* @returns error status 1135* 1136* @remarks none 1137* 1138******************************************************************************* 1139*/ 1140IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) 1141{ 1142 /* codec context */ 1143 codec_t *ps_codec = ps_proc->ps_codec; 1144 1145 /* nmb processing context*/ 1146 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1147 1148 /* indices */ 1149 WORD32 i4_mb_x, i4_mb_y; 1150 1151 /* strides */ 1152 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1153 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1154 1155 /* quant params */ 1156 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1157 1158 /* deblk ctxt */ 1159 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1160 1161 /* deblk bs context */ 1162 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 1163 1164 /* Pointer to mv_buffer of current frame */ 1165 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 1166 1167 /* Pointers for color space conversion */ 1168 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; 1169 1170 /* Pad the MB to support non standard sizes */ 1171 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; 1172 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; 1173 UWORD16 u2_num_rows = MB_SIZE; 1174 WORD32 convert_uv_only; 1175 1176 /********************************************************************/ 1177 /* BEGIN INIT */ 1178 /********************************************************************/ 1179 1180 i4_mb_x = ps_proc->i4_mb_x; 1181 i4_mb_y = ps_proc->i4_mb_y; 1182 1183 /* Number of mbs processed in one loop of process function */ 1184 ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs; 1185 ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs; 1186 1187 /* init buffer pointers */ 1188 convert_uv_only = 1; 1189 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)) 1190 { 1191 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz; 1192 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; 1193 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); 1194 convert_uv_only = 0; 1195 } 1196 else 1197 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); 1198 1199 1200 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || 1201 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || 1202 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1203 { 1204 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) || 1205 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU)) 1206 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base; 1207 1208 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE); 1209 } 1210 else 1211 { 1212 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE); 1213 } 1214 1215 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1216 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1217 1218 /* Tempral back and forward reference buffer */ 1219 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1220 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1221 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1222 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1223 1224 /* 1225 * Do color space conversion 1226 * NOTE : We assume there that the number of MB's to process will not span multiple rows 1227 */ 1228 switch (ps_codec->s_cfg.e_inp_color_fmt) 1229 { 1230 case IV_YUV_420SP_UV: 1231 case IV_YUV_420SP_VU: 1232 /* In case of 420 semi-planar input, copy last few rows to intermediate 1233 buffer as chroma trans functions access one extra byte due to interleaved input. 1234 This data will be padded if required */ 1235 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1236 { 1237 WORD32 num_rows = ps_codec->s_cfg.u4_disp_ht & 0xF; 1238 UWORD8 *pu1_src; 1239 UWORD8 *pu1_dst; 1240 WORD32 i; 1241 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1242 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1243 1244 pu1_dst = ps_proc->pu1_src_buf_luma; 1245 1246 for (i = 0; i < num_rows; i++) 1247 { 1248 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1249 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1250 pu1_dst += ps_proc->i4_src_strd; 1251 } 1252 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1253 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1254 pu1_dst = ps_proc->pu1_src_buf_chroma; 1255 1256 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte 1257 * due to interleaved input 1258 */ 1259 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE); 1260 for (i = 0; i < num_rows; i++) 1261 { 1262 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1263 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1264 pu1_dst += ps_proc->i4_src_strd; 1265 } 1266 1267 } 1268 break; 1269 1270 case IV_YUV_420P : 1271 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1272 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1273 1274 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1275 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1276 1277 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + 1278 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); 1279 1280 ps_codec->pf_ih264e_conv_420p_to_420sp( 1281 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, 1282 ps_proc->pu1_src_buf_luma, 1283 ps_proc->pu1_src_buf_chroma, u2_num_rows, 1284 ps_codec->s_cfg.u4_disp_wd, 1285 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], 1286 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], 1287 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], 1288 ps_proc->i4_src_strd, ps_proc->i4_src_strd, 1289 convert_uv_only); 1290 break; 1291 1292 case IV_YUV_422ILE : 1293 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) 1294 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1295 1296 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( 1297 ps_proc->pu1_src_buf_luma, 1298 ps_proc->pu1_src_buf_chroma, 1299 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, 1300 ps_codec->s_cfg.u4_disp_wd, u2_num_rows, 1301 ps_proc->i4_src_strd, ps_proc->i4_src_strd, 1302 ps_proc->i4_src_strd, 1303 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); 1304 break; 1305 1306 default: 1307 break; 1308 } 1309 1310 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0) && 1311 (ps_proc->i4_src_strd > (WORD32)ps_codec->s_cfg.u4_disp_wd) ) 1312 { 1313 UWORD32 u4_pad_wd, u4_pad_ht; 1314 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd); 1315 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd); 1316 u4_pad_ht = MB_SIZE; 1317 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1318 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz)); 1319 1320 ih264_pad_right_luma( 1321 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd, 1322 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd); 1323 1324 ih264_pad_right_chroma( 1325 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd, 1326 ps_proc->i4_src_strd, u4_pad_ht / 2, u4_pad_wd); 1327 } 1328 1329 /* pad bottom edge */ 1330 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) 1331 { 1332 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, 1333 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz); 1334 1335 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2, 1336 ps_proc->i4_src_strd, ps_proc->i4_src_strd, (u4_pad_bottom_sz / 2)); 1337 } 1338 1339 1340 /* packed mb coeff data */ 1341 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; 1342 1343 /* packed mb header data */ 1344 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; 1345 1346 /* slice index */ 1347 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; 1348 1349 /*********************************************************************/ 1350 /* ih264e_init_quant_params() routine is called at the pic init level*/ 1351 /* this would have initialized the qp. */ 1352 /* TODO_LATER: currently it is assumed that quant params donot change*/ 1353 /* across mb's. When they do calculate update ps_qp_params accordingly*/ 1354 /*********************************************************************/ 1355 1356 /* init mv buffer ptr */ 1357 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); 1358 1359 /* Init co-located mv buffer */ 1360 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); 1361 1362 if (i4_mb_y == 0) 1363 { 1364 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; 1365 } 1366 else 1367 { 1368 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); 1369 } 1370 1371 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); 1372 1373 /* mb type */ 1374 ps_proc->u4_mb_type = I16x16; 1375 1376 /* lambda */ 1377 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; 1378 1379 /* mb distortion */ 1380 ps_proc->i4_mb_distortion = SHRT_MAX; 1381 1382 if (i4_mb_x == 0) 1383 { 1384 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; 1385 1386 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; 1387 1388 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; 1389 1390 if (i4_mb_y == 0) 1391 { 1392 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); 1393 } 1394 } 1395 1396 /* mb cost */ 1397 ps_proc->i4_mb_cost = INT_MAX; 1398 1399 /**********************/ 1400 /* init deblk context */ 1401 /**********************/ 1402 ps_deblk->i4_mb_x = ps_proc->i4_mb_x; 1403 /* deblk lags the current mb proc by 1 row */ 1404 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ 1405 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ 1406 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ 1407 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; 1408 1409 /* buffer ptrs */ 1410 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); 1411 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); 1412 1413 /* init deblk bs context */ 1414 /* mb indices */ 1415 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1416 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1417 1418 /* init n_mb_process context */ 1419 ps_n_mb_ctxt->i4_mb_x = 0; 1420 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; 1421 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; 1422 1423 return IH264E_SUCCESS; 1424} 1425 1426/** 1427******************************************************************************* 1428* 1429* @brief This function performs luma & chroma padding 1430* 1431* @par Description: 1432* 1433* @param[in] ps_proc 1434* Process context corresponding to the job 1435* 1436* @param[in] pu1_curr_pic_luma 1437* Pointer to luma buffer 1438* 1439* @param[in] pu1_curr_pic_chroma 1440* Pointer to chroma buffer 1441* 1442* @param[in] i4_mb_x 1443* mb index x 1444* 1445* @param[in] i4_mb_y 1446* mb index y 1447* 1448* @param[in] i4_pad_ht 1449* number of rows to be padded 1450* 1451* @returns error status 1452* 1453* @remarks none 1454* 1455******************************************************************************* 1456*/ 1457IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, 1458 UWORD8 *pu1_curr_pic_luma, 1459 UWORD8 *pu1_curr_pic_chroma, 1460 WORD32 i4_mb_x, 1461 WORD32 i4_mb_y, 1462 WORD32 i4_pad_ht) 1463{ 1464 /* codec context */ 1465 codec_t *ps_codec = ps_proc->ps_codec; 1466 1467 /* strides */ 1468 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1469 1470 if (i4_mb_x == 0) 1471 { 1472 /* padding left luma */ 1473 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); 1474 1475 /* padding left chroma */ 1476 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); 1477 } 1478 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1479 { 1480 /* padding right luma */ 1481 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); 1482 1483 /* padding right chroma */ 1484 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); 1485 1486 if (i4_mb_y == ps_proc->i4_ht_mbs - 1) 1487 { 1488 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); 1489 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); 1490 1491 /* padding bottom luma */ 1492 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); 1493 1494 /* padding bottom chroma */ 1495 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1496 } 1497 } 1498 1499 if (i4_mb_y == 0) 1500 { 1501 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; 1502 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; 1503 WORD32 wd = MB_SIZE; 1504 1505 if (i4_mb_x == 0) 1506 { 1507 pu1_rec_luma -= PAD_LEFT; 1508 pu1_rec_chroma -= PAD_LEFT; 1509 1510 wd += PAD_LEFT; 1511 } 1512 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1513 { 1514 wd += PAD_RIGHT; 1515 } 1516 1517 /* padding top luma */ 1518 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); 1519 1520 /* padding top chroma */ 1521 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); 1522 } 1523 1524 return IH264E_SUCCESS; 1525} 1526 1527 1528 1529 1530/** 1531******************************************************************************* 1532* 1533* @brief This function performs deblocking, padding and halfpel generation for 1534* 'n' MBs 1535* 1536* @par Description: 1537* 1538* @param[in] ps_proc 1539* Process context corresponding to the job 1540* 1541* @param[in] pu1_curr_pic_luma 1542* Current MB being processed(Luma) 1543* 1544* @param[in] pu1_curr_pic_chroma 1545* Current MB being processed(Chroma) 1546* 1547* @param[in] i4_mb_x 1548* Column value of current MB processed 1549* 1550* @param[in] i4_mb_y 1551* Curent row processed 1552* 1553* @returns error status 1554* 1555* @remarks none 1556* 1557******************************************************************************* 1558*/ 1559IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, 1560 UWORD8 *pu1_curr_pic_luma, 1561 UWORD8 *pu1_curr_pic_chroma, 1562 WORD32 i4_mb_x, 1563 WORD32 i4_mb_y) 1564{ 1565 /* codec context */ 1566 codec_t *ps_codec = ps_proc->ps_codec; 1567 1568 /* n_mb processing context */ 1569 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1570 1571 /* deblk context */ 1572 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1573 1574 /* strides */ 1575 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1576 1577 /* loop variables */ 1578 WORD32 row, i, j, col; 1579 1580 /* Padding Width */ 1581 UWORD32 u4_pad_wd; 1582 1583 /* deblk_map of the row being deblocked */ 1584 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; 1585 1586 /* deblk_map_previous row */ 1587 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; 1588 1589 WORD32 u4_pad_top = 0; 1590 1591 WORD32 u4_deblk_prev_row = 0; 1592 1593 /* Number of mbs to be processed */ 1594 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; 1595 1596 /* Number of mbs actually processed 1597 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ 1598 WORD32 i4_n_mb_process_count = 0; 1599 1600 UWORD8 *pu1_pad_bottom_src = NULL; 1601 1602 UWORD8 *pu1_pad_src_luma = NULL; 1603 UWORD8 *pu1_pad_src_chroma = NULL; 1604 1605 if (ps_proc->u4_disable_deblock_level == 1) 1606 { 1607 /* If left most MB is processed, then pad left */ 1608 if (i4_mb_x == 0) 1609 { 1610 /* padding left luma */ 1611 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1612 1613 /* padding left chroma */ 1614 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1615 } 1616 /*last col*/ 1617 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) 1618 { 1619 /* padding right luma */ 1620 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1621 1622 /* padding right chroma */ 1623 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1624 } 1625 } 1626 1627 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) 1628 { 1629 /* if number of mb's to be processed are less than 'N', go back. 1630 * exception to the above clause is end of row */ 1631 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) 1632 { 1633 return IH264E_SUCCESS; 1634 } 1635 else 1636 { 1637 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); 1638 1639 /* performing deblocking for required number of MBs */ 1640 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1)) 1641 { 1642 u4_deblk_prev_row = 1; 1643 1644 /* checking whether the top rows are deblocked */ 1645 for (col = 0; col < i4_n_mb_process_count; col++) 1646 { 1647 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; 1648 } 1649 1650 /* checking whether the top right MB is deblocked */ 1651 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) 1652 { 1653 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; 1654 } 1655 1656 /* Top or Top right MBs not deblocked */ 1657 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) 1658 { 1659 return IH264E_SUCCESS; 1660 } 1661 1662 for (row = 0; row < i4_n_mb_process_count; row++) 1663 { 1664 ih264e_deblock_mb(ps_proc, ps_deblk); 1665 1666 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1667 1668 if (ps_deblk->i4_mb_y > 0) 1669 { 1670 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ 1671 { 1672 /* padding left luma */ 1673 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); 1674 1675 /* padding left chroma */ 1676 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1677 } 1678 1679 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ 1680 { 1681 /* padding right luma */ 1682 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1683 1684 /* padding right chroma */ 1685 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1686 } 1687 } 1688 ps_deblk->i4_mb_x++; 1689 1690 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1691 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1692 1693 } 1694 } 1695 else if(i4_mb_y > 0) 1696 { 1697 ps_deblk->i4_mb_x += i4_n_mb_process_count; 1698 1699 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; 1700 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; 1701 } 1702 1703 if (i4_mb_y == 2) 1704 { 1705 u4_pad_wd = i4_n_mb_process_count * MB_SIZE; 1706 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; 1707 1708 if (ps_n_mb_ctxt->i4_mb_x == 0) 1709 { 1710 u4_pad_wd += PAD_LEFT; 1711 u4_pad_top = -PAD_LEFT; 1712 } 1713 1714 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1715 { 1716 u4_pad_wd += PAD_RIGHT; 1717 } 1718 1719 /* padding top luma */ 1720 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); 1721 1722 /* padding top chroma */ 1723 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); 1724 } 1725 1726 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; 1727 1728 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1729 { 1730 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1731 { 1732 /* Bottom Padding is done in one stretch for the entire width */ 1733 if (ps_proc->u4_disable_deblock_level != 1) 1734 { 1735 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; 1736 1737 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; 1738 1739 ps_n_mb_ctxt->i4_mb_x = 0; 1740 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; 1741 ps_deblk->i4_mb_x = 0; 1742 ps_deblk->i4_mb_y = ps_proc->i4_mb_y; 1743 1744 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ 1745 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1746 1747 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; 1748 1749 j = (ps_proc->i4_wd_mbs) / i4_n_mbs; 1750 1751 for (i = 0; i < j; i++) 1752 { 1753 for (col = 0; col < i4_n_mbs; col++) 1754 { 1755 ih264e_deblock_mb(ps_proc, ps_deblk); 1756 1757 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1758 1759 ps_deblk->i4_mb_x++; 1760 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1761 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1762 ps_n_mb_ctxt->i4_mb_x++; 1763 } 1764 } 1765 1766 for (col = 0; col < i4_n_mb_process_count; col++) 1767 { 1768 ih264e_deblock_mb(ps_proc, ps_deblk); 1769 1770 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1771 1772 ps_deblk->i4_mb_x++; 1773 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1774 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1775 ps_n_mb_ctxt->i4_mb_x++; 1776 } 1777 1778 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; 1779 1780 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; 1781 1782 /* padding left luma */ 1783 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1784 1785 /* padding left chroma */ 1786 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1787 1788 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1789 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1790 1791 /* padding left luma */ 1792 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1793 1794 /* padding left chroma */ 1795 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1796 1797 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1798 1799 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1800 1801 /* padding right luma */ 1802 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1803 1804 /* padding right chroma */ 1805 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1806 1807 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1808 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1809 1810 /* padding right luma */ 1811 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1812 1813 /* padding right chroma */ 1814 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1815 1816 } 1817 1818 /* In case height is less than 2 MBs pad top */ 1819 if (ps_proc->i4_ht_mbs <= 2) 1820 { 1821 UWORD8 *pu1_pad_top_src; 1822 /* padding top luma */ 1823 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT; 1824 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP); 1825 1826 /* padding top chroma */ 1827 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT; 1828 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1)); 1829 } 1830 1831 /* padding bottom luma */ 1832 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; 1833 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); 1834 1835 /* padding bottom chroma */ 1836 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; 1837 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1838 } 1839 } 1840 } 1841 } 1842 1843 return IH264E_SUCCESS; 1844} 1845 1846 1847/** 1848******************************************************************************* 1849* 1850* @brief This function performs luma & chroma core coding for a set of mb's. 1851* 1852* @par Description: 1853* The mb to be coded is taken and is evaluated over a predefined set of modes 1854* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost 1855* is selected and using intra/inter prediction filters, prediction is carried out. 1856* The deviation between src and pred signal constitutes error signal. This error 1857* signal is transformed (hierarchical transform if necessary) and quantized. The 1858* quantized residue is packed in to entropy buffer for entropy coding. This is 1859* repeated for all the mb's enlisted under the job. 1860* 1861* @param[in] ps_proc 1862* Process context corresponding to the job 1863* 1864* @returns error status 1865* 1866* @remarks none 1867* 1868******************************************************************************* 1869*/ 1870WORD32 ih264e_process(process_ctxt_t *ps_proc) 1871{ 1872 /* error status */ 1873 WORD32 error_status = IH264_SUCCESS; 1874 1875 /* codec context */ 1876 codec_t *ps_codec = ps_proc->ps_codec; 1877 1878 /* cbp luma, chroma */ 1879 UWORD32 u4_cbp_l, u4_cbp_c; 1880 1881 /* width in mbs */ 1882 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 1883 1884 /* loop var */ 1885 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; 1886 1887 /* valid modes */ 1888 UWORD32 u4_valid_modes = 0; 1889 1890 /* gate threshold */ 1891 WORD32 i4_gate_threshold = 0; 1892 1893 /* is intra */ 1894 WORD32 luma_idx, chroma_idx, is_intra; 1895 1896 /* temp variables */ 1897 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1; 1898 1899 /* list of modes for evaluation */ 1900 if (ps_proc->i4_slice_type == ISLICE) 1901 { 1902 /* enable intra 16x16 */ 1903 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1904 1905 /* enable intra 8x8 */ 1906 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; 1907 1908 /* enable intra 4x4 */ 1909 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1910 } 1911 else if (ps_proc->i4_slice_type == PSLICE) 1912 { 1913 /* enable intra 16x16 */ 1914 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1915 1916 /* enable intra 4x4 */ 1917 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1918 { 1919 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1920 } 1921 1922 /* enable inter P16x16 */ 1923 u4_valid_modes |= (1 << P16x16); 1924 } 1925 else if (ps_proc->i4_slice_type == BSLICE) 1926 { 1927 /* enable intra 16x16 */ 1928 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1929 1930 /* enable intra 4x4 */ 1931 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1932 { 1933 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1934 } 1935 1936 /* enable inter B16x16 */ 1937 u4_valid_modes |= (1 << B16x16); 1938 } 1939 1940 1941 /* init entropy */ 1942 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; 1943 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; 1944 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); 1945 1946 /* compute recon when : 1947 * 1. current frame is to be used as a reference 1948 * 2. dump recon for bit stream sanity check 1949 */ 1950 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || 1951 ps_codec->s_cfg.u4_enable_recon; 1952 1953 /* Encode 'n' macroblocks, 1954 * 'n' being the number of mbs dictated by current proc ctxt */ 1955 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) 1956 { 1957 /* since we have not yet found sad, we have not yet got min sad */ 1958 /* we need to initialize these variables for each MB */ 1959 /* TODO how to get the min sad into the codec */ 1960 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; 1961 ps_proc->u4_min_sad_reached = 0; 1962 1963 /* mb analysis */ 1964 { 1965 /* temp var */ 1966 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; 1967 1968 /* force intra refresh ? */ 1969 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || 1970 (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) || 1971 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); 1972 1973 /* evaluate inter 16x16 modes */ 1974 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) 1975 { 1976 /* compute nmb me */ 1977 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) 1978 { 1979 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, 1980 i4_wd_mbs - ps_proc->i4_mb_x)); 1981 } 1982 1983 /* set pointers to ME data appropriately for other modules to use */ 1984 { 1985 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; 1986 1987 /* get the min sad condition for current mb */ 1988 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 1989 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 1990 1991 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); 1992 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); 1993 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); 1994 1995 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; 1996 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; 1997 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 1998 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 1999 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; 2000 2001 /* get the best sub pel buffer */ 2002 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; 2003 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; 2004 } 2005 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2006 } 2007 else 2008 { 2009 /* Derive neighbor availability for the current macroblock */ 2010 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; 2011 2012 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2013 } 2014 2015 /* 2016 * If air says intra, we need to force the following code path to evaluate intra 2017 * The easy way is just to say that the inter cost is too much 2018 */ 2019 if (!i4_air_enable_inter) 2020 { 2021 ps_proc->u4_min_sad_reached = 0; 2022 ps_proc->i4_mb_cost = INT_MAX; 2023 ps_proc->i4_mb_distortion = INT_MAX; 2024 } 2025 else if (ps_proc->u4_mb_type == PSKIP) 2026 { 2027 goto UPDATE_MB_INFO; 2028 } 2029 2030 /* wait until the proc of [top + 1] mb is computed. 2031 * We wait till the proc dependencies are satisfied */ 2032 if(ps_proc->i4_mb_y > 0) 2033 { 2034 /* proc map */ 2035 UWORD8 *pu1_proc_map_top; 2036 2037 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); 2038 2039 while (1) 2040 { 2041 volatile UWORD8 *pu1_buf; 2042 WORD32 idx = i4_mb_idx + 1; 2043 2044 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); 2045 pu1_buf = pu1_proc_map_top + idx; 2046 if(*pu1_buf) 2047 break; 2048 ithread_yield(); 2049 } 2050 } 2051 2052 /* If we already have the minimum sad, there is no point in searching for sad again */ 2053 if (ps_proc->u4_min_sad_reached == 0) 2054 { 2055 /* intra gating in inter slices */ 2056 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ 2057 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) 2058 { 2059 /* distortion of neighboring blocks */ 2060 WORD32 i4_distortion[4]; 2061 2062 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; 2063 2064 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; 2065 2066 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; 2067 2068 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; 2069 2070 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; 2071 2072 } 2073 2074 2075 /* If we are going to force intra we need to evaluate intra irrespective of gating */ 2076 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) 2077 { 2078 /* evaluate intra 4x4 modes */ 2079 if (u4_valid_modes & (1 << I4x4)) 2080 { 2081 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 2082 { 2083 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); 2084 } 2085 else 2086 { 2087 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); 2088 } 2089 } 2090 2091 /* evaluate intra 16x16 modes */ 2092 if (u4_valid_modes & (1 << I16x16)) 2093 { 2094 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); 2095 } 2096 2097 /* evaluate intra 8x8 modes */ 2098 if (u4_valid_modes & (1 << I8x8)) 2099 { 2100 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2101 } 2102 2103 } 2104 } 2105 } 2106 2107 /* is intra */ 2108 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) 2109 { 2110 luma_idx = ps_proc->u4_mb_type; 2111 chroma_idx = 0; 2112 is_intra = 1; 2113 2114 /* evaluate chroma blocks for intra */ 2115 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2116 } 2117 else 2118 { 2119 luma_idx = 3; 2120 chroma_idx = 1; 2121 is_intra = 0; 2122 } 2123 ps_proc->u4_is_intra = is_intra; 2124 ps_proc->ps_pu->b1_intra_flag = is_intra; 2125 2126 /* redo MV pred of neighbors in the case intra mb */ 2127 /* TODO : currently called unconditionally, needs to be called only in the case of intra 2128 * to modify neighbors */ 2129 if (ps_proc->i4_slice_type != ISLICE) 2130 { 2131 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); 2132 } 2133 2134 /* Perform luma mb core coding */ 2135 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); 2136 2137 /* Perform luma mb core coding */ 2138 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); 2139 2140 /* coded block pattern */ 2141 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; 2142 2143 if (!ps_proc->u4_is_intra) 2144 { 2145 if (ps_proc->i4_slice_type == BSLICE) 2146 { 2147 if (ih264e_find_bskip_params(ps_proc, PRED_L0)) 2148 { 2149 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; 2150 } 2151 } 2152 else if(!ps_proc->u4_cbp) 2153 { 2154 if (ih264e_find_pskip_params(ps_proc, PRED_L0)) 2155 { 2156 ps_proc->u4_mb_type = PSKIP; 2157 } 2158 } 2159 } 2160 2161UPDATE_MB_INFO: 2162 2163 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ 2164 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); 2165 2166 /**********************************************************************/ 2167 /* if disable deblock level is '0' this implies enable deblocking for */ 2168 /* all edges of all macroblocks with out any restrictions */ 2169 /* */ 2170 /* if disable deblock level is '1' this implies disable deblocking for*/ 2171 /* all edges of all macroblocks with out any restrictions */ 2172 /* */ 2173 /* if disable deblock level is '2' this implies enable deblocking for */ 2174 /* all edges of all macroblocks except edges overlapping with slice */ 2175 /* boundaries. This option is not currently supported by the encoder */ 2176 /* hence the slice map should be of no significance to perform debloc */ 2177 /* king */ 2178 /**********************************************************************/ 2179 2180 if (ps_proc->u4_compute_recon) 2181 { 2182 /* deblk context */ 2183 /* src pointers */ 2184 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; 2185 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; 2186 2187 /* src indices */ 2188 UWORD32 i4_mb_x = ps_proc->i4_mb_x; 2189 UWORD32 i4_mb_y = ps_proc->i4_mb_y; 2190 2191 /* compute blocking strength */ 2192 if (ps_proc->u4_disable_deblock_level != 1) 2193 { 2194 ih264e_compute_bs(ps_proc); 2195 } 2196 2197 /* nmb deblocking and hpel and padding */ 2198 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, 2199 pu1_cur_pic_chroma, i4_mb_x, 2200 i4_mb_y); 2201 } 2202 2203 /* update the context after for coding next mb */ 2204 error_status |= ih264e_update_proc_ctxt(ps_proc); 2205 2206 /* Once the last row is processed, mark the buffer status appropriately */ 2207 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) 2208 { 2209 /* Pointer to current picture buffer structure */ 2210 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; 2211 2212 /* Pointer to current picture's mv buffer structure */ 2213 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 2214 2215 /**********************************************************************/ 2216 /* if disable deblock level is '0' this implies enable deblocking for */ 2217 /* all edges of all macroblocks with out any restrictions */ 2218 /* */ 2219 /* if disable deblock level is '1' this implies disable deblocking for*/ 2220 /* all edges of all macroblocks with out any restrictions */ 2221 /* */ 2222 /* if disable deblock level is '2' this implies enable deblocking for */ 2223 /* all edges of all macroblocks except edges overlapping with slice */ 2224 /* boundaries. This option is not currently supported by the encoder */ 2225 /* hence the slice map should be of no significance to perform debloc */ 2226 /* king */ 2227 /**********************************************************************/ 2228 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); 2229 2230 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); 2231 2232 if (ps_codec->s_cfg.u4_enable_recon) 2233 { 2234 /* pic cnt */ 2235 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; 2236 2237 /* rec buffers */ 2238 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; 2239 2240 /* is last? */ 2241 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; 2242 2243 /* frame time stamp */ 2244 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; 2245 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; 2246 } 2247 2248 } 2249 } 2250 2251 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); 2252 2253 return error_status; 2254} 2255 2256/** 2257******************************************************************************* 2258* 2259* @brief 2260* Function to update rc context after encoding 2261* 2262* @par Description 2263* This function updates the rate control context after the frame is encoded. 2264* Number of bits consumed by the current frame, frame distortion, frame cost, 2265* number of intra/inter mb's, ... are passed on to rate control context for 2266* updating the rc model. 2267* 2268* @param[in] ps_codec 2269* Handle to codec context 2270* 2271* @param[in] ctxt_sel 2272* frame context selector 2273* 2274* @param[in] pic_cnt 2275* pic count 2276* 2277* @returns i4_stuffing_byte 2278* number of stuffing bytes (if necessary) 2279* 2280* @remarks 2281* 2282******************************************************************************* 2283*/ 2284WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) 2285{ 2286 /* proc set base idx */ 2287 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; 2288 2289 /* proc ctxt */ 2290 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; 2291 2292 /* frame qp */ 2293 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; 2294 2295 /* cbr rc return status */ 2296 WORD32 i4_stuffing_byte = 0; 2297 2298 /* current frame stats */ 2299 frame_info_t s_frame_info; 2300 picture_type_e rc_pic_type; 2301 2302 /* temp var */ 2303 WORD32 i, j; 2304 2305 /********************************************************************/ 2306 /* BEGIN INIT */ 2307 /********************************************************************/ 2308 2309 /* init frame info */ 2310 irc_init_frame_info(&s_frame_info); 2311 2312 /* get frame info */ 2313 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) 2314 { 2315 /*****************************************************************/ 2316 /* One frame can be encoded by max of u4_num_cores threads */ 2317 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ 2318 /* u4_num_cores threads */ 2319 /*****************************************************************/ 2320 for (j = 0; j< MAX_MB_TYPE; j++) 2321 { 2322 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; 2323 2324 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; 2325 2326 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; 2327 } 2328 2329 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; 2330 2331 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; 2332 2333 /*****************************************************************/ 2334 /* gather number of residue and header bits consumed by the frame*/ 2335 /*****************************************************************/ 2336 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); 2337 } 2338 2339 /* get pic type */ 2340 switch (ps_codec->pic_type) 2341 { 2342 case PIC_I: 2343 case PIC_IDR: 2344 rc_pic_type = I_PIC; 2345 break; 2346 case PIC_P: 2347 rc_pic_type = P_PIC; 2348 break; 2349 case PIC_B: 2350 rc_pic_type = B_PIC; 2351 break; 2352 default: 2353 assert(0); 2354 break; 2355 } 2356 2357 /* update rc lib with current frame stats */ 2358 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, 2359 &(s_frame_info), 2360 ps_codec->s_rate_control.pps_pd_frm_rate, 2361 ps_codec->s_rate_control.pps_time_stamp, 2362 ps_codec->s_rate_control.pps_frame_time, 2363 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), 2364 &rc_pic_type, 2365 i4_is_first_frm, 2366 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], 2367 u1_frame_qp, 2368 &ps_codec->s_rate_control.num_intra_in_prev_frame, 2369 &ps_codec->s_rate_control.i4_avg_activity); 2370 return i4_stuffing_byte; 2371} 2372 2373/** 2374******************************************************************************* 2375* 2376* @brief 2377* entry point of a spawned encoder thread 2378* 2379* @par Description: 2380* The encoder thread dequeues a proc/entropy job from the encoder queue and 2381* calls necessary routines. 2382* 2383* @param[in] pv_proc 2384* Process context corresponding to the thread 2385* 2386* @returns error status 2387* 2388* @remarks 2389* 2390******************************************************************************* 2391*/ 2392WORD32 ih264e_process_thread(void *pv_proc) 2393{ 2394 /* error status */ 2395 IH264_ERROR_T ret = IH264_SUCCESS; 2396 WORD32 error_status = IH264_SUCCESS; 2397 2398 /* proc ctxt */ 2399 process_ctxt_t *ps_proc = pv_proc; 2400 2401 /* codec ctxt */ 2402 codec_t *ps_codec = ps_proc->ps_codec; 2403 2404 /* structure to represent a processing job entry */ 2405 job_t s_job; 2406 2407 /* blocking call : entropy dequeue is non-blocking till all 2408 * the proc jobs are processed */ 2409 WORD32 is_blocking = 0; 2410 2411 /* set affinity */ 2412 ithread_set_affinity(ps_proc->i4_id); 2413 2414 while(1) 2415 { 2416 /* dequeue a job from the entropy queue */ 2417 { 2418 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); 2419 2420 /* codec context selector */ 2421 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; 2422 2423 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; 2424 2425 /* have the lock */ 2426 if (error == 0) 2427 { 2428 if (*pu4_buf == 0) 2429 { 2430 /* no entropy threads are active, try dequeuing a job from the entropy queue */ 2431 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); 2432 if (IH264_SUCCESS == ret) 2433 { 2434 *pu4_buf = 1; 2435 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2436 goto WORKER; 2437 } 2438 else if(is_blocking) 2439 { 2440 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2441 break; 2442 } 2443 } 2444 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2445 } 2446 } 2447 2448 /* dequeue a job from the process queue */ 2449 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); 2450 if (IH264_SUCCESS != ret) 2451 { 2452 if(ps_proc->i4_id) 2453 break; 2454 else 2455 { 2456 is_blocking = 1; 2457 continue; 2458 } 2459 } 2460 2461WORKER: 2462 /* choose appropriate proc context based on proc_base_idx */ 2463 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; 2464 2465 switch (s_job.i4_cmd) 2466 { 2467 case CMD_PROCESS: 2468 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; 2469 ps_proc->i4_mb_x = s_job.i2_mb_x; 2470 ps_proc->i4_mb_y = s_job.i2_mb_y; 2471 2472 /* init process context */ 2473 ih264e_init_proc_ctxt(ps_proc); 2474 2475 /* core code all mbs enlisted under the current job */ 2476 error_status |= ih264e_process(ps_proc); 2477 break; 2478 2479 case CMD_ENTROPY: 2480 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; 2481 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; 2482 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; 2483 2484 /* init entropy */ 2485 ih264e_init_entropy_ctxt(ps_proc); 2486 2487 /* entropy code all mbs enlisted under the current job */ 2488 error_status |= ih264e_entropy(ps_proc); 2489 break; 2490 2491 default: 2492 error_status |= IH264_FAIL; 2493 break; 2494 } 2495 } 2496 2497 /* send error code */ 2498 ps_proc->i4_error_code = error_status; 2499 return ret; 2500} 2501